Skip to content

Commit f2ed30d

Browse files
chenjwqin-ctx
authored andcommitted
Fix/memory v2 (#1450)
1 parent 7f85ec7 commit f2ed30d

File tree

2 files changed

+9
-70
lines changed

2 files changed

+9
-70
lines changed

examples/openclaw-plugin/text-utils.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,10 @@ export function sanitizeUserTextForCapture(text: string): string {
5858
// 格式: "System: [时间] Compacted ... Context ... [时间] 实际内容"
5959
if (COMPACTED_SYSTEM_MSG_RE.test(text)) {
6060
// 提取最后一个 ] 之后的内容(即实际用户输入)
61-
const match = text.match(/\]\s*(.+)$/);
62-
if (match && match[1]) {
63-
return match[1].replace(/\s+/g, " ").trim();
61+
const lastBracketIndex = text.lastIndexOf("]");
62+
if (lastBracketIndex !== -1) {
63+
const content = text.slice(lastBracketIndex + 1);
64+
return content.replace(/\s+/g, " ").trim();
6465
}
6566
return "";
6667
}

openviking/session/session.py

Lines changed: 5 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -182,24 +182,6 @@ def __init__(
182182

183183
logger.info(f"Session created: {self.session_id} for user {self.user}")
184184

185-
async def _get_latest_archive_last_msg_time(self) -> Optional[float]:
186-
"""获取上一次归档的最后一条消息的时间戳(毫秒),用于过滤需要提取记忆的消息。"""
187-
# 使用 compression_index - 1 获取上一次归档的索引
188-
if self._compression.compression_index <= 1:
189-
return None
190-
# 获取上一次归档的目录
191-
archive_uri = (
192-
f"{self._session_uri}/history/archive_{self._compression.compression_index - 1:03d}"
193-
)
194-
messages = await self._read_archive_messages(archive_uri)
195-
if messages and messages[-1].created_at:
196-
# 解析 ISO 时间戳为毫秒
197-
from dateutil import parser
198-
199-
dt = parser.parse(messages[-1].created_at)
200-
return dt.timestamp() * 1000
201-
return None
202-
203185
async def load(self):
204186
"""Load session data from storage."""
205187
if self._loaded:
@@ -454,11 +436,6 @@ async def commit_async(self) -> Dict[str, Any]:
454436
await self._save_meta()
455437

456438
self._compression.original_count += len(messages_to_archive)
457-
# 从最新归档中获取上一次归档的时间戳(用于过滤需要提取记忆的消息)
458-
previous_archive_time = await self._get_latest_archive_last_msg_time()
459-
logger.info(
460-
f"commit_async: previous_archive_time from archive={previous_archive_time}, compression_index={self._compression.compression_index}"
461-
)
462439
logger.info(
463440
f"Archived: {len(messages_to_archive)} messages → "
464441
f"history/archive_{self._compression.compression_index:03d}/"
@@ -478,38 +455,14 @@ async def commit_async(self) -> Dict[str, Any]:
478455
owner_user_id=self.ctx.user.user_id,
479456
)
480457

481-
# 只传递上一次归档之后的新消息给 memory extraction
482-
# 使用 previous_archive_time 过滤(在设置新时间之前)
483-
messages_for_extraction = messages_to_archive
484-
logger.info(
485-
f"Memory extraction filter: previous_archive_time={previous_archive_time}, "
486-
f"messages_count={len(messages_to_archive)}"
487-
)
488-
if previous_archive_time:
489-
archive_time_str = datetime.fromtimestamp(
490-
previous_archive_time / 1000, timezone.utc
491-
).isoformat()
492-
# 打印前几条消息的 created_at 用于调试
493-
if messages_to_archive:
494-
logger.info(
495-
f"Debug: first msg created_at={messages_to_archive[0].created_at}, "
496-
f"archive_time_str={archive_time_str}"
497-
)
498-
messages_for_extraction = [
499-
m for m in messages_to_archive if m.created_at and m.created_at > archive_time_str
500-
]
501-
logger.info(
502-
f"Memory extraction: {len(messages_to_archive)} total -> {len(messages_for_extraction)} new (after {previous_archive_time})"
503-
)
504-
505458
asyncio.create_task(
506459
self._run_memory_extraction(
507460
task_id=task.task_id,
508461
archive_uri=archive_uri,
509-
messages=messages_for_extraction,
462+
messages=messages_to_archive,
510463
usage_records=usage_snapshot,
511-
first_message_id=messages_for_extraction[0].id if messages_for_extraction else "",
512-
last_message_id=messages_for_extraction[-1].id if messages_for_extraction else "",
464+
first_message_id=messages_to_archive[0].id if messages_to_archive else "",
465+
last_message_id=messages_to_archive[-1].id if messages_to_archive else "",
513466
)
514467
)
515468

@@ -778,21 +731,6 @@ async def get_session_context(self, token_budget: int = 128_000) -> Dict[str, An
778731
context = await self._collect_session_context_components()
779732
merged_messages = context["messages"]
780733

781-
# 过滤:只返回上一次归档之后的最新消息(从最新归档获取时间戳)
782-
previous_time = await self._get_latest_archive_last_msg_time()
783-
if previous_time:
784-
original_count = len(merged_messages)
785-
archive_time_str = datetime.fromtimestamp(
786-
previous_time / 1000, timezone.utc
787-
).isoformat()
788-
merged_messages = [
789-
m for m in merged_messages if m.created_at and m.created_at > archive_time_str
790-
]
791-
logger.info(
792-
f"[get_session_context] filtered messages: {original_count} -> {len(merged_messages)}, "
793-
f"after archive time={previous_time}"
794-
)
795-
796734
message_tokens = sum(m.estimated_tokens for m in merged_messages)
797735

798736
# 精简日志:只打印关键信息
@@ -811,7 +749,7 @@ async def get_session_context(self, token_budget: int = 128_000) -> Dict[str, An
811749
if include_latest_overview:
812750
remaining_budget -= latest_archive_tokens
813751

814-
# 不再返回 pre_archive_abstracts(只保留 latest_archive_overview)
752+
# pre_archive_abstracts: 保留字段返回空数组,保持 API 向下兼容
815753
included_pre_archive_abstracts: List[Dict[str, str]] = []
816754
pre_archive_tokens = 0
817755

@@ -825,7 +763,7 @@ async def get_session_context(self, token_budget: int = 128_000) -> Dict[str, An
825763
"latest_archive_overview": (
826764
latest_archive["overview"] if include_latest_overview else ""
827765
),
828-
# 不再返回 pre_archive_abstracts
766+
"pre_archive_abstracts": [], # 保持 API 向后兼容,返回空数组
829767
"messages": [m.to_dict() for m in merged_messages],
830768
"estimatedTokens": message_tokens + archive_tokens,
831769
"stats": {

0 commit comments

Comments
 (0)