Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/qtd-pre-processing' into qtd-pre…
Browse files Browse the repository at this point in the history
…-processing
  • Loading branch information
dayesouza committed Nov 8, 2024
2 parents d08931a + bd9da6a commit a0b8f8f
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 4 deletions.
10 changes: 7 additions & 3 deletions intelligence_toolkit/query_text_data/answer_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ def build_report_markdown(
matched_chunks = {
f"{text['title']} ({text['chunk_id']})": text for text in text_jsons
}
home_link = "#" + content_structure["report_title"].replace(" ", "-").lower()
home_link = "#report"
report = f'# Report\n\n## Query\n\n*{query}*\n\n## Expanded Query\n\n*{expanded_query}*\n\n## Answer\n\n{content_structure["answer"]}\n\n## Analysis\n\n### {content_structure["report_title"]}\n\n{content_structure["report_summary"]}\n\n'
for theme in content_structure["theme_order"]:
report += f'#### Theme: {theme["theme_title"]}\n\n{theme["theme_summary"]}\n\n'
Expand Down Expand Up @@ -378,8 +378,12 @@ def cluster_cids(relevant_cids, cid_to_vector, target_clusters):
clustered_cids = {}
if len(relevant_cids) > 0:
# use k-means clustering to group relevant cids into target_clusters clusters
cids = [cid for cid in relevant_cids]
vectors = [cid_to_vector[cid] for cid in cids]
cids = []
vectors = []
for relevant_cid in relevant_cids:
if relevant_cid in cid_to_vector:
cids.append(relevant_cid)
vectors.append(cid_to_vector[relevant_cid])
kmeans = cluster.KMeans(n_clusters=target_clusters)
kmeans.fit(vectors)
cluster_assignments = kmeans.predict(vectors)
Expand Down
3 changes: 2 additions & 1 deletion intelligence_toolkit/query_text_data/relevance_assessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,9 @@ async def detect_relevant_chunks(
community_mean_rank = []

for community, cids in level_to_community_to_candidate_cids[level].items():
filtered_cids = [c for c in cids if c in semantic_search_cids]
mean_rank = np.mean(
sorted([semantic_search_cids.index(c) for c in cids])[
sorted([semantic_search_cids.index(c) for c in filtered_cids])[
: chunk_search_config.community_ranking_chunks
]
)
Expand Down

0 comments on commit a0b8f8f

Please sign in to comment.