Skip to content

[Bug]: 一份markdown合同add_resource后提示错误 #300

@samhuangszu1

Description

@samhuangszu1

Bug Description

代码:
import logging
import sys
import os
import glob

Configure logging

logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(sys.stdout),
logging.FileHandler('add_resources.log')
]
)

def add_file_to_openviking(client, file_path):
"""Add a single file to OpenViking."""
try:
logging.info(f"Adding file: {file_path}")
print(f"正在添加: {os.path.basename(file_path)}")

    res = client.add_resource(
        path=file_path,
        target="viking://resources/contract"
    )
    logging.info(f"add_resource result: {res}")

    # Check for errors in the response
    if isinstance(res, dict):
        if res.get('status') == 'error':
            errors = res.get('errors', [])
            error_msg = '; '.join(errors) if errors else '未知错误'
            print(f"❌ 解析失败: {os.path.basename(file_path)} - {error_msg}")
            logging.error(f"Parse error for {file_path}: {error_msg}")
            return None
        elif 'root_uri' in res:
            print(f"✅ 成功添加: {os.path.basename(file_path)} -> {res['root_uri']}")
            return res['root_uri']
    
    # Fallback for unexpected response format
    print(f"⚠️ 添加完成: {os.path.basename(file_path)} (无root_uri)")
    return None

except Exception as e:
    logging.error(f"Failed to add {file_path}: {e}")
    print(f"❌ 添加失败: {os.path.basename(file_path)} - {e}")
    return None

def add_directory_to_openviking(client, dir_path):
"""Add all files in a directory to OpenViking."""
if not os.path.isdir(dir_path):
print(f"❌ 目录不存在: {dir_path}")
return []

print(f"扫描目录: {dir_path}")
added_uris = []

# Find all files (not directories)
file_pattern = os.path.join(dir_path, "**")
all_files = glob.glob(file_pattern, recursive=True)
files_only = [f for f in all_files if os.path.isfile(f)]

print(f"找到 {len(files_only)} 个文件")

for file_path in files_only:
    uri = add_file_to_openviking(client, file_path)
    if uri:
        added_uris.append(uri)

return added_uris

def main():
if len(sys.argv) < 2:
print("用法:")
print(" python add_resources.py <文件路径>")
print(" python add_resources.py <目录路径>")
print(" python add_resources.py <文件1> <文件2> <文件3>...")
print("")
print("说明:")
print(" 所有文件将添加到固定命名空间: viking://resources/contract")
print(" 使用 OpenViking 的 'target' 参数指定目标URI")
print("")
print("示例:")
print(" python add_resources.py ./docs/contract.pdf")
print(" python add_resources.py ./docs/")
print(" python add_resources.py ./docs/file1.pdf ./docs/file2.docx")
return

try:
    logging.info("Starting resource addition session...")
    
    # Import modules
    import openviking as ov
    logging.info("OpenViking imported")

    # Initialize OpenViking
    print("初始化 OpenViking...")
    client = ov.OpenViking(path="./data")
    client.initialize()
    logging.info("OpenViking initialized")
    print("✅ OpenViking 初始化完成")

    # Process all arguments (file/directory paths)
    all_uris = []
    paths = sys.argv[1:]

    for path in paths:
        if os.path.isdir(path):
            print(f"\n📁 处理目录: {path}")
            uris = add_directory_to_openviking(client, path)
            all_uris.extend(uris)
        elif os.path.isfile(path):
            print(f"\n📄 处理文件: {path}")
            uri = add_file_to_openviking(client, path)
            if uri:
                all_uris.append(uri)
        else:
            print(f"❌ 路径不存在: {path}")

    print(f"\n🎉 处理完成!")
    print(f"成功添加了 {len(all_uris)} 个资源")

    if all_uris:
        print("\n添加的资源URI:")
        for uri in all_uris:
            print(f"  - {uri}")

    # Wait for processing
    print("\n⏳ 等待异步处理完成...")
    try:
        client.wait_processed()
        logging.info("Async processing completed")
        print("✅ 异步处理完成")
    except Exception as e:
        logging.warning(f"等待处理时出错: {e}")
        print(f"⚠️ 处理可能仍在后台进行: {e}")

    # Cleanup
    client.close()
    logging.info("Session ended")
    print("✅ 资源添加会话结束")

except Exception as e:
    logging.error(f"Fatal error: {e}")
    print(f"💥 发生严重错误: {e}")
    import traceback
    traceback.print_exc()

if name == "main":
main()

错误如下:
python add_resources.py ./docs/MinerU_markdown_Contract.md
2026-02-26 10:00:45,505 - INFO - Starting resource addition session...
2026-02-26 10:00:46,436 - INFO - OpenViking imported
初始化 OpenViking...
2026-02-26 10:00:46,554 - INFO - Scheduler started
2026-02-26 10:00:46,559 - INFO - Added job "PersistCollection._register_index_manage_job" to job store "default"
2026-02-26 10:00:46,720 - INFO - OpenViking initialized
✅ OpenViking 初始化完成

📄 处理文件: ./docs/MinerU_markdown_Contract.md
2026-02-26 10:00:46,721 - INFO - Adding file: ./docs/MinerU_markdown_Contract.md
正在添加: MinerU_markdown_Contract.md
2026-02-26 10:00:46,835 - openviking.parse.parsers.markdown - ERROR - [MarkdownParser] Parse failed: failed to open file: open /root/financial-rag/data/viking/temp/02261000_9ca7d4/MinerU_markdown_Contract/5初始销售期间投资者资金的管理及利息处理方式_四基金份额认购金额及付款期限_五投资冷静期_六回访制度_六基金的成立与备案_一本合同签署的方式_二基金成立的条件_三募集失败的处理方式_四基金的备案.md: file name too long
Traceback (most recent call last):
File "/root/financial-rag/venv/lib/python3.12/site-packages/pyagfs/client.py", line 179, in write
response.raise_for_status()
File "/root/financial-rag/venv/lib/python3.12/site-packages/requests/models.py", line 1026, in raise_for_status
raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 500 Server Error: Internal Server Error for url: http://localhost:1833/api/v1/files?path=%2Flocal%2Ftemp%2F02261000_9ca7d4%2FMinerU_markdown_Contract%2F5%E5%88%9D%E5%A7%8B%E9%94%80%E5%94%AE%E6%9C%9F%E9%97%B4%E6%8A%95%E8%B5%84%E8%80%85%E8%B5%84%E9%87%91%E7%9A%84%E7%AE%A1%E7%90%86%E5%8F%8A%E5%88%A9%E6%81%AF%E5%A4%84%E7%90%86%E6%96%B9%E5%BC%8F_%E5%9B%9B%E5%9F%BA%E9%87%91%E4%BB%BD%E9%A2%9D%E8%AE%A4%E8%B4%AD%E9%87%91%E9%A2%9D%E5%8F%8A%E4%BB%98%E6%AC%BE%E6%9C%9F%E9%99%90_%E4%BA%94%E6%8A%95%E8%B5%84%E5%86%B7%E9%9D%99%E6%9C%9F_%E5%85%AD%E5%9B%9E%E8%AE%BF%E5%88%B6%E5%BA%A6_%E5%85%AD%E5%9F%BA%E9%87%91%E7%9A%84%E6%88%90%E7%AB%8B%E4%B8%8E%E5%A4%87%E6%A1%88_%E4%B8%80%E6%9C%AC%E5%90%88%E5%90%8C%E7%AD%BE%E7%BD%B2%E7%9A%84%E6%96%B9%E5%BC%8F_%E4%BA%8C%E5%9F%BA%E9%87%91%E6%88%90%E7%AB%8B%E7%9A%84%E6%9D%A1%E4%BB%B6_%E4%B8%89%E5%8B%9F%E9%9B%86%E5%A4%B1%E8%B4%A5%E7%9A%84%E5%A4%84%E7%90%86%E6%96%B9%E5%BC%8F_%E5%9B%9B%E5%9F%BA%E9%87%91%E7%9A%84%E5%A4%87%E6%A1%88.md

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "/root/financial-rag/venv/lib/python3.12/site-packages/openviking/parse/parsers/markdown.py", line 188, in parse_content
await self._parse_and_create_structure(content, headings, root_dir, source_path)
File "/root/financial-rag/venv/lib/python3.12/site-packages/openviking/parse/parsers/markdown.py", line 426, in _parse_and_create_structure
await self._process_sections_with_merge(
File "/root/financial-rag/venv/lib/python3.12/site-packages/openviking/parse/parsers/markdown.py", line 458, in _process_sections_with_merge
pending = await self._try_add_to_pending(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/financial-rag/venv/lib/python3.12/site-packages/openviking/parse/parsers/markdown.py", line 487, in _try_add_to_pending
await self._save_merged(viking_fs, parent_dir, pending)
File "/root/financial-rag/venv/lib/python3.12/site-packages/openviking/parse/parsers/markdown.py", line 573, in _save_merged
await viking_fs.write_file(f"{parent_dir}/{name}.md", content)
File "/root/financial-rag/venv/lib/python3.12/site-packages/openviking/storage/viking_fs.py", line 872, in write_file
self.agfs.write(path, content)
File "/root/financial-rag/venv/lib/python3.12/site-packages/pyagfs/client.py", line 227, in write
self._handle_request_error(e)
File "/root/financial-rag/venv/lib/python3.12/site-packages/pyagfs/client.py", line 54, in _handle_request_error
raise AGFSClientError(error_msg)
pyagfs.exceptions.AGFSClientError: failed to open file: open /root/financial-rag/data/viking/temp/02261000_9ca7d4/MinerU_markdown_Contract/5初始销售期间投资者资金的管理及利息处理方式_四基金份额认购金额及付款期限_五投资冷静期_六回访制度_六基金的成立与备案_一本合同签署的方式_二基金成立的条件_三募集失败的处理方式_四基金的备案.md: file name too long
2026-02-26 10:00:46,837 - INFO - add_resource result: {'status': 'error', 'errors': ['Parse error: failed to open file: open /root/financial-rag/data/viking/temp/02261000_9ca7d4/MinerU_markdown_Contract/5初始销售期间投资者资金的管理及利息处理方式_四基金份额认购金额及付款期限_五投资冷静期_六回访制度_六基金的成立与备案_一本合同签署的方式_二基金成立的条件_三募集失败的处理方式_四基金的备案.md: file name too long'], 'source_path': None}
⚠️ 添加完成: MinerU_markdown_Contract.md (无root_uri)

🎉 处理完成!
成功添加了 0 个资源

⏳ 等待异步处理完成...
2026-02-26 10:00:46,841 - INFO - Async processing completed
✅ 异步处理完成
2026-02-26 10:00:46,841 - INFO - Removed job 1772071246559451622__index_manage
2026-02-26 10:00:46,842 - INFO - Scheduler has been shut down
2026-02-26 10:00:46,844 - INFO - Session ended
✅ 资源添加会话结束

Steps to Reproduce

import logging
import sys
import os
import glob

Configure logging

logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(sys.stdout),
logging.FileHandler('add_resources.log')
]
)

def add_file_to_openviking(client, file_path):
"""Add a single file to OpenViking."""
try:
logging.info(f"Adding file: {file_path}")
print(f"正在添加: {os.path.basename(file_path)}")

    res = client.add_resource(
        path=file_path,
        target="viking://resources/contract"
    )
    logging.info(f"add_resource result: {res}")

    # Check for errors in the response
    if isinstance(res, dict):
        if res.get('status') == 'error':
            errors = res.get('errors', [])
            error_msg = '; '.join(errors) if errors else '未知错误'
            print(f"❌ 解析失败: {os.path.basename(file_path)} - {error_msg}")
            logging.error(f"Parse error for {file_path}: {error_msg}")
            return None
        elif 'root_uri' in res:
            print(f"✅ 成功添加: {os.path.basename(file_path)} -> {res['root_uri']}")
            return res['root_uri']
    
    # Fallback for unexpected response format
    print(f"⚠️ 添加完成: {os.path.basename(file_path)} (无root_uri)")
    return None

except Exception as e:
    logging.error(f"Failed to add {file_path}: {e}")
    print(f"❌ 添加失败: {os.path.basename(file_path)} - {e}")
    return None

def add_directory_to_openviking(client, dir_path):
"""Add all files in a directory to OpenViking."""
if not os.path.isdir(dir_path):
print(f"❌ 目录不存在: {dir_path}")
return []

print(f"扫描目录: {dir_path}")
added_uris = []

# Find all files (not directories)
file_pattern = os.path.join(dir_path, "**")
all_files = glob.glob(file_pattern, recursive=True)
files_only = [f for f in all_files if os.path.isfile(f)]

print(f"找到 {len(files_only)} 个文件")

for file_path in files_only:
    uri = add_file_to_openviking(client, file_path)
    if uri:
        added_uris.append(uri)

return added_uris

def main():
if len(sys.argv) < 2:
print("用法:")
print(" python add_resources.py <文件路径>")
print(" python add_resources.py <目录路径>")
print(" python add_resources.py <文件1> <文件2> <文件3>...")
print("")
print("说明:")
print(" 所有文件将添加到固定命名空间: viking://resources/contract")
print(" 使用 OpenViking 的 'target' 参数指定目标URI")
print("")
print("示例:")
print(" python add_resources.py ./docs/contract.pdf")
print(" python add_resources.py ./docs/")
print(" python add_resources.py ./docs/file1.pdf ./docs/file2.docx")
return

try:
    logging.info("Starting resource addition session...")
    
    # Import modules
    import openviking as ov
    logging.info("OpenViking imported")

    # Initialize OpenViking
    print("初始化 OpenViking...")
    client = ov.OpenViking(path="./data")
    client.initialize()
    logging.info("OpenViking initialized")
    print("✅ OpenViking 初始化完成")

    # Process all arguments (file/directory paths)
    all_uris = []
    paths = sys.argv[1:]

    for path in paths:
        if os.path.isdir(path):
            print(f"\n📁 处理目录: {path}")
            uris = add_directory_to_openviking(client, path)
            all_uris.extend(uris)
        elif os.path.isfile(path):
            print(f"\n📄 处理文件: {path}")
            uri = add_file_to_openviking(client, path)
            if uri:
                all_uris.append(uri)
        else:
            print(f"❌ 路径不存在: {path}")

    print(f"\n🎉 处理完成!")
    print(f"成功添加了 {len(all_uris)} 个资源")

    if all_uris:
        print("\n添加的资源URI:")
        for uri in all_uris:
            print(f"  - {uri}")

    # Wait for processing
    print("\n⏳ 等待异步处理完成...")
    try:
        client.wait_processed()
        logging.info("Async processing completed")
        print("✅ 异步处理完成")
    except Exception as e:
        logging.warning(f"等待处理时出错: {e}")
        print(f"⚠️ 处理可能仍在后台进行: {e}")

    # Cleanup
    client.close()
    logging.info("Session ended")
    print("✅ 资源添加会话结束")

except Exception as e:
    logging.error(f"Fatal error: {e}")
    print(f"💥 发生严重错误: {e}")
    import traceback
    traceback.print_exc()

if name == "main":
main()

Expected Behavior

能正常添加内容

Actual Behavior

如bug描述

Minimal Reproducible Example

Error Logs

OpenViking Version

openviking-0.1.17.dist-info openviking_cli

Python Version

3.12.0

Operating System

Linux

Model Backend

None

Additional Context

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    Status

    Done

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions