Skip to content

Commit 4cda73b

Browse files
committed
refactor: optimize document matching logic for tag conditions
1 parent 27b7458 commit 4cda73b

File tree

1 file changed

+44
-25
lines changed

1 file changed

+44
-25
lines changed

apps/application/flow/step_node/search_document_node/impl/base_search_document_node.py

Lines changed: 44 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -121,44 +121,63 @@ def handle_custom_tags(self, document_id_list: List, search_condition_list: list
121121
compare_type = condition['compare']
122122

123123
# 构建查询条件
124-
if compare_type == 'contain':
125-
q_filter = Q(tag__key=tag_key, tag__value__icontains=field_value)
126-
elif compare_type == 'eq':
127-
q_filter = Q(tag__key=tag_key, tag__value=field_value)
128-
elif compare_type == 'not_contain':
129-
q_filter = ~Q(tag__key=tag_key, tag__value__icontains=field_value)
124+
if compare_type == 'not_contain':
125+
# 反向查询:找出包含该标签的文档,然后排除
126+
exclude_docs = set(QuerySet(DocumentTag).filter(
127+
document_id__in=matched_doc_ids,
128+
tag__key=tag_key,
129+
tag__value__icontains=field_value
130+
).values_list('document_id', flat=True).distinct())
131+
132+
matched_doc_ids = matched_doc_ids - exclude_docs
130133
else:
131-
continue
134+
if compare_type == 'contain':
135+
q_filter = Q(tag__key=tag_key, tag__value__icontains=field_value)
136+
elif compare_type == 'eq':
137+
q_filter = Q(tag__key=tag_key, tag__value=field_value)
138+
else:
139+
continue
132140

133-
# 单次查询获取符合条件的文档
134-
tag_docs = set(QuerySet(DocumentTag).filter(
135-
document_id__in=matched_doc_ids
136-
).filter(q_filter).values_list('document_id', flat=True).distinct())
141+
# 单次查询获取符合条件的文档
142+
tag_docs = set(QuerySet(DocumentTag).filter(
143+
document_id__in=matched_doc_ids
144+
).filter(q_filter).values_list('document_id', flat=True).distinct())
137145

138-
matched_doc_ids = matched_doc_ids.intersection(tag_docs)
146+
matched_doc_ids = matched_doc_ids.intersection(tag_docs)
139147

140148
return matched_doc_ids
141149

142150
else:
143-
# OR逻辑:使用一次查询完成
144-
q_objects = Q()
151+
# OR逻辑
152+
matched_docs = set()
145153

146154
for condition in search_condition_list:
147155
tag_key = condition['key']
148156
field_value = self.workflow_manage.generate_prompt(condition['value'])
149157
compare_type = condition['compare']
150158

151-
if compare_type == 'contain':
152-
q_objects |= Q(tag__key=tag_key, tag__value__icontains=field_value)
153-
elif compare_type == 'eq':
154-
q_objects |= Q(tag__key=tag_key, tag__value=field_value)
155-
elif compare_type == 'not_contain':
156-
q_objects |= ~Q(tag__key=tag_key, tag__value__icontains=field_value)
157-
158-
# 一次查询获取所有匹配的文档
159-
matched_docs = set(QuerySet(DocumentTag).filter(
160-
document_id__in=document_id_list
161-
).filter(q_objects).values_list('document_id', flat=True).distinct())
159+
if compare_type == 'not_contain':
160+
# 反向查询:找出包含该标签的文档,然后用全集减去
161+
exclude_docs = set(QuerySet(DocumentTag).filter(
162+
document_id__in=document_id_list,
163+
tag__key=tag_key,
164+
tag__value__icontains=field_value
165+
).values_list('document_id', flat=True).distinct())
166+
167+
matched_docs = matched_docs.union(set(document_id_list) - exclude_docs)
168+
else:
169+
if compare_type == 'contain':
170+
q_filter = Q(tag__key=tag_key, tag__value__icontains=field_value)
171+
elif compare_type == 'eq':
172+
q_filter = Q(tag__key=tag_key, tag__value=field_value)
173+
else:
174+
continue
175+
176+
docs = set(QuerySet(DocumentTag).filter(
177+
document_id__in=document_id_list
178+
).filter(q_filter).values_list('document_id', flat=True).distinct())
179+
180+
matched_docs = matched_docs.union(docs)
162181

163182
return matched_docs
164183

0 commit comments

Comments
 (0)