-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathast_analyzer.py
165 lines (147 loc) · 6.42 KB
/
ast_analyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
"""
Module to perform AST analysis using Tree-sitter and generate a basic dependency graph.
Now supports both Python and JavaScript.
Requires: pip install tree_sitter networkx
"""
import sys
import logging
import networkx as nx
from tree_sitter import Language, Parser
logger = logging.getLogger(__name__)
TREE_SITTER_LANGUAGE_PATH = "build/my-languages.so"
try:
PY_LANGUAGE = Language(TREE_SITTER_LANGUAGE_PATH, "python")
except Exception as e:
sys.exit(f"Error loading Tree-sitter language library for Python: {e}")
try:
JS_LANGUAGE = Language(TREE_SITTER_LANGUAGE_PATH, "javascript")
except Exception as e:
logger.error(f"Error loading Tree-sitter language library for JavaScript: {e}")
JS_LANGUAGE = None
parser_py = Parser()
parser_py.set_language(PY_LANGUAGE)
if JS_LANGUAGE:
parser_js = Parser()
parser_js.set_language(JS_LANGUAGE)
else:
parser_js = None
def analyze_python_file_treesitter(content, llm_hunt=False):
"""
Parse Python content using Tree-sitter and extract function and class definitions.
If llm_hunt is True, include an 'llm_hint' for each node.
Returns a dictionary with lists of functions and classes.
"""
try:
byte_content = content.encode("utf8")
tree = parser_py.parse(byte_content)
except Exception as e:
logger.error(f"Tree-sitter parse error (Python): {e}")
return {"error": f"Tree-sitter parse error: {e}"}
root_node = tree.root_node
functions = []
classes = []
def traverse(node):
if node.type == "function_definition":
for child in node.children:
if child.type == "identifier":
func_name = byte_content[child.start_byte:child.end_byte].decode("utf8")
hint = f"Examine the function '{func_name}' to determine its role and business logic."
functions.append({
"name": func_name,
"start_point": node.start_point,
"end_point": node.end_point,
"llm_hint": hint if llm_hunt else ""
})
break
elif node.type == "class_definition":
for child in node.children:
if child.type == "identifier":
class_name = byte_content[child.start_byte:child.end_byte].decode("utf8")
hint = f"Analyze the class '{class_name}' to understand its methods and responsibilities."
classes.append({
"name": class_name,
"start_point": node.start_point,
"end_point": node.end_point,
"llm_hint": hint if llm_hunt else ""
})
break
for child in node.children:
traverse(child)
traverse(root_node)
return {"functions": functions, "classes": classes}
def analyze_javascript_file_treesitter(content, llm_hunt=False):
"""
Parse JavaScript content using Tree-sitter and extract function and class definitions.
If llm_hunt is True, include an 'llm_hint' for each node.
Returns a dictionary with lists of functions and classes.
"""
if not parser_js:
return {"error": "JavaScript parser is not available."}
try:
byte_content = content.encode("utf8")
tree = parser_js.parse(byte_content)
except Exception as e:
logger.error(f"Tree-sitter parse error (JavaScript): {e}")
return {"error": f"Tree-sitter parse error: {e}"}
root_node = tree.root_node
functions = []
classes = []
def traverse(node):
if node.type == "function_declaration":
for child in node.children:
if child.type == "identifier":
func_name = byte_content[child.start_byte:child.end_byte].decode("utf8")
hint = f"Inspect the JavaScript function '{func_name}' to understand its functionality."
functions.append({
"name": func_name,
"start_point": node.start_point,
"end_point": node.end_point,
"llm_hint": hint if llm_hunt else ""
})
break
elif node.type == "class_declaration":
for child in node.children:
if child.type == "identifier":
class_name = byte_content[child.start_byte:child.end_byte].decode("utf8")
hint = f"Examine the JavaScript class '{class_name}' for its properties and methods."
classes.append({
"name": class_name,
"start_point": node.start_point,
"end_point": node.end_point,
"llm_hint": hint if llm_hunt else ""
})
break
for child in node.children:
traverse(child)
traverse(root_node)
return {"functions": functions, "classes": classes}
def generate_dependency_graph(functions):
"""
Stub function to generate a dependency graph.
For demonstration, we create a graph where each function is a node.
In a full implementation, you would analyze function bodies for calls.
"""
graph = nx.DiGraph()
for func in functions:
graph.add_node(func["name"], start=func["start_point"], end=func["end_point"])
# Placeholder: edges can be added by analyzing function calls.
return nx.node_link_data(graph, edges="links") # Explicitly set edges to suppress FutureWarning.
def analyze_file(content, language="python", llm_hunt=False):
"""
Dispatch AST analysis based on language.
Supports "python" and "javascript".
"""
if language.lower() == "python":
analysis = analyze_python_file_treesitter(content, llm_hunt=llm_hunt)
if "functions" in analysis:
dependency_graph = generate_dependency_graph(analysis["functions"])
analysis["dependency_graph"] = dependency_graph
return analysis
elif language.lower() == "javascript":
analysis = analyze_javascript_file_treesitter(content, llm_hunt=llm_hunt)
if "functions" in analysis:
dependency_graph = generate_dependency_graph(analysis["functions"])
analysis["dependency_graph"] = dependency_graph
return analysis
else:
return {"error": f"AST analysis for language '{language}' is not supported yet."}