Skip to content

Commit

Permalink
Tune concept graph
Browse files Browse the repository at this point in the history
  • Loading branch information
Darren Edge committed Nov 10, 2024
1 parent 6fa8ee3 commit 41c9621
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
2 changes: 1 addition & 1 deletion intelligence_toolkit/query_text_data/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def process_text_chunks(
self,
max_cluster_size: int = 25,
min_edge_weight: int = 2,
min_node_degree: int = 1,
min_node_degree: int = 2,
callbacks=[],
) -> ProcessedChunks:
"""
Expand Down
10 changes: 5 additions & 5 deletions intelligence_toolkit/query_text_data/graph_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ def update_concept_graph_edges(node_to_period_counts, edge_to_period_counts, per
nps = sorted(set(TextBlob(chunk).noun_phrases))
filtered_nps = []
for np in nps:
# split on space or newline
parts = re.split(r"[\s\n]+", np)
if all([re.match(r"[a-zA-Z0-9\-]+", part) for part in parts]):
filtered_nps.append(np)
# split on space
parts = [p for p in re.split(r"[\s]+", np) if len(p) > 0]
if len(parts) > 1 and all([re.match(r"^[a-zA-Z0-9\-]+\n?$", part) for part in parts]):
filtered_nps.append(np.replace("\n", ""))
filtered_nps = sorted(filtered_nps)
for np in filtered_nps:
concept_to_cids[np].append(cid)
Expand Down Expand Up @@ -87,7 +87,7 @@ def build_meta_graph(G, hierarchical_communities):
return level_to_label_to_network


def prepare_concept_graph(G, min_edge_weight, min_node_degree, std_trim=4):
def prepare_concept_graph(G, min_edge_weight, min_node_degree, std_trim=3):
degrees = [x[1] for x in G.degree()]
mean_degree = np.mean(degrees)
std_degree = np.std(degrees)
Expand Down

0 comments on commit 41c9621

Please sign in to comment.