Skip to content

Commit 65465fd

Browse files
committed
Reformat code and enable run with command line arguments
1 parent bfaf77c commit 65465fd

File tree

4 files changed

+233
-54
lines changed

4 files changed

+233
-54
lines changed

README.md

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# GitXApiAnalysis
2+
3+
This project is used to analyse git trace.
4+
5+
To do so git project are converted to xApi files, then the program tries to classify commits.
6+
7+
## Running
8+
9+
### Generate files
10+
11+
`python main.py path/to/repo -g`
12+
Output location can be customized with `-o`
13+
14+
### Run analysis
15+
16+
`python main.py path/to/xapi`
17+
Output location can be customized with `-o`
18+
19+
## Output Format
20+
21+
The output consists mainly of one file named "'repo'_processed.json" which consists of an xapi files with a list of statements.
22+
Each statements may be one of the original commit or generated by the program in an effort to try to split commits into atomic tasks.
23+
24+
Classified statements have a 'task' field in the 'context' part of the statement.
25+
26+
Full list of current tasks :
27+
28+
- ADD_RETURN_VALUE
29+
- ADD_VARIABLE_VALUE
30+
- AnnotationAdd
31+
- AnnotationRemove
32+
- ASSIGN_VARIABLE
33+
- BlockAdd
34+
- BlockRemove
35+
- CHANGE_METHOD_INVOCATED
36+
- CHANGE_VARIABLE_USED
37+
- CHANGE_VARIABLE_VALUE
38+
- CHANGE_LITTERAL_VALUE
39+
- LITTERAL_TO_VARIABLE
40+
- VARIABLE_TO_LITTERAL
41+
- ClassAdd
42+
- ClassRemove
43+
- COMMENT_ADDITION
44+
- COMMENT_DELETION
45+
- COMMENT_EDITION
46+
- COMMENT_MOVED
47+
- CONDITION
48+
- ADD_FUNCTION_PARAMETER
49+
- REMOVE_FUNCTION_PARAMETER
50+
- EDIT_FUNCTION_PARAMETERS
51+
- EDIT_RETURN_VALUE
52+
- CHANGE_LITTERAL_VALUE
53+
- CutPaste
54+
- DeclarationStatementAdd
55+
- DeclarationStatementRemove
56+
- EmptyCommit
57+
- EmptyLine
58+
- ForAdd
59+
- ForRemove
60+
- IfAdd
61+
- IfRemove
62+
- SyntaxTypoAdd
63+
- SyntaxTypoRemove
64+
- FunctionAdd
65+
- FunctionRemove
66+
- ImportStatementAdd
67+
- ImportStatementRemove
68+
- InvocationStatementAdd
69+
- InvocationStatementRemove
70+
- LineBreakChange
71+
- LineSpacingChange
72+
- ModifyVariableValue
73+
- NotSource
74+
- PackageStatementAdd
75+
- PackageStatementRemove
76+
- RefactoringMiner
77+
- RENAME_VARIABLE
78+
- RENAME_FUNCTION
79+
- REPLACE_FUNCTION
80+
- REPLACE_METHOD
81+
- ReturnStatementAdd
82+
- ReturnStatementRemove
83+
- STRING_EDITION
84+
- MODIFY_VARIABLE_VALUE
85+
- TrimEditionContent
86+
- TRUE_TO_FALSE
87+
- FALSE_TO_TRUE
88+
- TYPO_ADD
89+
- TYPO_DEL
90+
- UNASSIGN_VARIABLE
91+
- USE_NEW_VARIABLE
92+
- USE_OLD_VARIABLE
93+
- VariableAssignStatementAdd
94+
- VariableAssignStatementRemove

identifier/tasks_identifier/code_task/RefactoringMinerTask.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88

99
class RefactoringMinerTask(CodeTaskIdentifier):
1010

11-
def __init__(self) -> None:
12-
with open("refactoring.json") as f:
11+
def __init__(self, file) -> None:
12+
with open(file) as f:
1313
self.json = dict(
1414
[
1515
(c["sha1"], c["refactorings"])

main.py

Lines changed: 75 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
import GitToXApi.utils
23
from tincan import Statement
34
import GitToXApi.differential
45
from GitToXApi.utils import *
@@ -9,8 +10,9 @@
910
import debug
1011
import shutil
1112
import os
12-
import pm4py
13-
from pm4py.objects.log.obj import EventLog, Trace, Event
13+
from xes_file import *
14+
import argparse
15+
import subprocess
1416

1517

1618
def format_statement(st: Statement) -> Statement:
@@ -45,9 +47,12 @@ def exec_modifier(statements: list[Statement], identifier: CodeModifier):
4547

4648

4749
def dump(
48-
name: str, statements: list[Statement], filter: Callable[[Statement], bool]
50+
name: str,
51+
out: str,
52+
statements: list[Statement],
53+
filter: Callable[[Statement], bool],
4954
) -> None:
50-
with open("out/" + name + ".json", "w") as f:
55+
with open(out + name + ".json", "w") as f:
5156
f.write(
5257
json.dumps(
5358
[stmt.as_version() for stmt in statements if filter(stmt)],
@@ -56,18 +61,44 @@ def dump(
5661
)
5762

5863

59-
if __name__ == "__main__":
64+
def generate_files(repo: Repo, out_folder, repo_name: str):
65+
repo_path = repo.git_dir[: repo.git_dir.rindex("/")]
66+
print("Getting files for", repo_path)
67+
stmts = GitToXApi.utils.generate_xapi(repo, {})
68+
69+
dest_xapi = out_folder + repo_name + ".json"
70+
if not os.path.exists(dest_xapi):
71+
print("Generation of xapi files for", repo_path)
72+
with open(dest_xapi, "w") as f:
73+
f.write(GitToXApi.utils.serialize_statements(stmts, indent=2))
74+
75+
dest_refactoring = out_folder + repo_name + "_refactoring.json"
76+
if not os.path.exists(dest_refactoring):
77+
print("Generation of refactoring files for", repo_path)
78+
subprocess.run(
79+
"RefactoringMiner -a " + repo_path + " -json " + dest_refactoring,
80+
shell=True,
81+
capture_output=True,
82+
)
83+
84+
print("Files where collected for", repo_path)
85+
86+
87+
def process_file(path: str, out: str):
88+
89+
refactoring_file = path[: path.rfind(".")] + "_refactoring.json"
6090

6191
initial_statements = None
62-
with open("original.json") as f:
92+
with open(path) as f:
6393
initial_statements = deserialize_statements(f)
94+
initial_statements = [format_statement(s) for s in initial_statements]
6495
initial_total = len(initial_statements)
6596
statements = copy.deepcopy(initial_statements)
6697

6798
code_modifiers = [
6899
PreciseVerbModifier(),
69100
NotSourceTask(),
70-
RefactoringMinerTask(),
101+
RefactoringMinerTask(refactoring_file),
71102
TrimEditionContentModifier(),
72103
LineBreakAndSpacingChangeTask(),
73104
CutPasteTask(),
@@ -97,9 +128,6 @@ def dump(
97128
for modif in code_modifiers:
98129
statements = exec_modifier(statements, modif)
99130

100-
shutil.rmtree("out", ignore_errors=True)
101-
os.mkdir("./out")
102-
103131
scores = {"UNKNOWN": 0}
104132

105133
for st in statements:
@@ -117,16 +145,20 @@ def dump(
117145
safe_name: str = k.lower().replace(" ", "_")
118146
dump(
119147
safe_name,
148+
out,
120149
statements,
121150
lambda x: "task" in x.context.extensions
122151
and x.context.extensions["task"]["id"] == k,
123152
)
124153
dump(
125154
"unknown",
155+
out,
126156
statements,
127157
lambda x: not "task" in x.context.extensions,
128158
)
129159

160+
dump(path[: path.rfind(".")] + "_processed.json", out, statements, lambda x: True)
161+
130162
scores = [(k, scores[k]) for k in scores]
131163
scores.sort(key=lambda v: -v[1])
132164

@@ -136,54 +168,45 @@ def dump(
136168
]
137169

138170
if debug.GENERATE_XES_FROM_INITIAL:
139-
keys = [st.object.id for st in initial_statements]
140-
141-
event_log = EventLog()
142-
trace = Trace()
143-
144-
for key in keys:
145-
event = Event()
146-
classes = set()
147-
for st in statements:
148-
task = TaskIdentifier.get_task(st)
149-
if task == None or not "origins" in st.context.extensions:
150-
continue
151-
if not key in st.context.extensions["origins"]:
152-
continue
153-
classes.add(task[0])
154-
if len(classes) == 0:
155-
classes.add("UNKNOWN")
156-
classes = list(classes)
157-
classes.sort()
158-
event["concept:name"] = "/".join(classes)
159-
event["org:resource"] = st.object.definition.description["en-US"]
160-
event["time:timestamp"] = st.timestamp
161-
trace.append(event)
162-
163-
event_log.append(trace)
164-
165-
pm4py.write_xes(event_log, "out/initial.xes")
171+
generate_xes_from_initial(
172+
initial_statements, statements, out=out + "initial.xes"
173+
)
166174

167175
if debug.GENERATE_XES_FROM_CREATED:
176+
generate_xes_from_created(statements, out=out + "created.xes")
168177

169-
event_log = EventLog()
170-
trace = Trace()
171178

172-
for st in statements:
173-
event = Event()
179+
if __name__ == "__main__":
180+
parser = argparse.ArgumentParser(
181+
prog="GitXApiAnalysis",
182+
description="Program used to analyze git xapi files",
183+
)
184+
185+
parser.add_argument("filename")
186+
parser.add_argument(
187+
"--generate", "-g", action=argparse.BooleanOptionalAction, default=False
188+
)
189+
parser.add_argument("-o", "--out", default="")
190+
191+
args = parser.parse_args()
174192

175-
clazz = "UNKNOWN"
176-
task = TaskIdentifier.get_task(st)
177-
if task != None:
178-
clazz = task[0]
193+
filename = args.filename
194+
generate = args.generate
195+
out_folder = args.out
179196

180-
if clazz == "EmptyCommit":
181-
continue
197+
if out_folder != "" and out_folder[-1] != "/":
198+
out_folder += "/"
199+
if filename[-1] == "/":
200+
filename = filename[:-1]
182201

183-
event["concept:name"] = clazz
184-
event["time:timestamp"] = st.timestamp
185-
trace.append(event)
202+
if out_folder != "" and not os.path.exists(out_folder):
203+
os.makedirs(out_folder)
186204

187-
event_log.append(trace)
205+
if not os.path.exists(filename):
206+
raise FileNotFoundError(filename)
188207

189-
pm4py.write_xes(event_log, "out/artificial.xes")
208+
if generate:
209+
repo = Repo(filename)
210+
generate_files(repo, out_folder, filename)
211+
else:
212+
process_file(filename, out_folder)

xes_file.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
from GitToXApi import *
2+
from pm4py.objects.log.obj import EventLog, Trace, Event
3+
from pm4py import write_xes
4+
from identifier.tasks_identifier import TaskIdentifier
5+
6+
7+
def generate_xes_from_initial(
8+
initial: list[Statement], created: list[Statement], out="out/initial.xes"
9+
):
10+
keys = [st.object.id for st in initial]
11+
12+
event_log = EventLog()
13+
trace = Trace()
14+
15+
for key in keys:
16+
event = Event()
17+
classes = set()
18+
for st in created:
19+
task = TaskIdentifier.get_task(st)
20+
if task == None or not "origins" in st.context.extensions:
21+
continue
22+
if not key in st.context.extensions["origins"]:
23+
continue
24+
classes.add(task[0])
25+
if len(classes) == 0:
26+
classes.add("UNKNOWN")
27+
classes = list(classes)
28+
classes.sort()
29+
event["concept:name"] = "/".join(classes)
30+
event["org:resource"] = st.object.definition.description["en-US"]
31+
event["time:timestamp"] = st.timestamp
32+
trace.append(event)
33+
34+
event_log.append(trace)
35+
36+
write_xes(event_log, out)
37+
38+
39+
def generate_xes_from_created(
40+
created: list[Statement], mask: list = ["EmptyCommit"], out="out/initial.xes"
41+
):
42+
event_log = EventLog()
43+
trace = Trace()
44+
45+
for st in created:
46+
event = Event()
47+
48+
clazz = "UNKNOWN"
49+
task = TaskIdentifier.get_task(st)
50+
if task != None:
51+
clazz = task[0]
52+
53+
if clazz in mask:
54+
continue
55+
56+
event["concept:name"] = clazz
57+
event["time:timestamp"] = st.timestamp
58+
trace.append(event)
59+
60+
event_log.append(trace)
61+
62+
write_xes(event_log, out)

0 commit comments

Comments
 (0)