1
1
import json
2
+ import GitToXApi .utils
2
3
from tincan import Statement
3
4
import GitToXApi .differential
4
5
from GitToXApi .utils import *
9
10
import debug
10
11
import shutil
11
12
import os
12
- import pm4py
13
- from pm4py .objects .log .obj import EventLog , Trace , Event
13
+ from xes_file import *
14
+ import argparse
15
+ import subprocess
14
16
15
17
16
18
def format_statement (st : Statement ) -> Statement :
@@ -45,9 +47,12 @@ def exec_modifier(statements: list[Statement], identifier: CodeModifier):
45
47
46
48
47
49
def dump (
48
- name : str , statements : list [Statement ], filter : Callable [[Statement ], bool ]
50
+ name : str ,
51
+ out : str ,
52
+ statements : list [Statement ],
53
+ filter : Callable [[Statement ], bool ],
49
54
) -> None :
50
- with open (" out/" + name + ".json" , "w" ) as f :
55
+ with open (out + name + ".json" , "w" ) as f :
51
56
f .write (
52
57
json .dumps (
53
58
[stmt .as_version () for stmt in statements if filter (stmt )],
@@ -56,18 +61,44 @@ def dump(
56
61
)
57
62
58
63
59
- if __name__ == "__main__" :
64
+ def generate_files (repo : Repo , out_folder , repo_name : str ):
65
+ repo_path = repo .git_dir [: repo .git_dir .rindex ("/" )]
66
+ print ("Getting files for" , repo_path )
67
+ stmts = GitToXApi .utils .generate_xapi (repo , {})
68
+
69
+ dest_xapi = out_folder + repo_name + ".json"
70
+ if not os .path .exists (dest_xapi ):
71
+ print ("Generation of xapi files for" , repo_path )
72
+ with open (dest_xapi , "w" ) as f :
73
+ f .write (GitToXApi .utils .serialize_statements (stmts , indent = 2 ))
74
+
75
+ dest_refactoring = out_folder + repo_name + "_refactoring.json"
76
+ if not os .path .exists (dest_refactoring ):
77
+ print ("Generation of refactoring files for" , repo_path )
78
+ subprocess .run (
79
+ "RefactoringMiner -a " + repo_path + " -json " + dest_refactoring ,
80
+ shell = True ,
81
+ capture_output = True ,
82
+ )
83
+
84
+ print ("Files where collected for" , repo_path )
85
+
86
+
87
+ def process_file (path : str , out : str ):
88
+
89
+ refactoring_file = path [: path .rfind ("." )] + "_refactoring.json"
60
90
61
91
initial_statements = None
62
- with open ("original.json" ) as f :
92
+ with open (path ) as f :
63
93
initial_statements = deserialize_statements (f )
94
+ initial_statements = [format_statement (s ) for s in initial_statements ]
64
95
initial_total = len (initial_statements )
65
96
statements = copy .deepcopy (initial_statements )
66
97
67
98
code_modifiers = [
68
99
PreciseVerbModifier (),
69
100
NotSourceTask (),
70
- RefactoringMinerTask (),
101
+ RefactoringMinerTask (refactoring_file ),
71
102
TrimEditionContentModifier (),
72
103
LineBreakAndSpacingChangeTask (),
73
104
CutPasteTask (),
@@ -97,9 +128,6 @@ def dump(
97
128
for modif in code_modifiers :
98
129
statements = exec_modifier (statements , modif )
99
130
100
- shutil .rmtree ("out" , ignore_errors = True )
101
- os .mkdir ("./out" )
102
-
103
131
scores = {"UNKNOWN" : 0 }
104
132
105
133
for st in statements :
@@ -117,16 +145,20 @@ def dump(
117
145
safe_name : str = k .lower ().replace (" " , "_" )
118
146
dump (
119
147
safe_name ,
148
+ out ,
120
149
statements ,
121
150
lambda x : "task" in x .context .extensions
122
151
and x .context .extensions ["task" ]["id" ] == k ,
123
152
)
124
153
dump (
125
154
"unknown" ,
155
+ out ,
126
156
statements ,
127
157
lambda x : not "task" in x .context .extensions ,
128
158
)
129
159
160
+ dump (path [: path .rfind ("." )] + "_processed.json" , out , statements , lambda x : True )
161
+
130
162
scores = [(k , scores [k ]) for k in scores ]
131
163
scores .sort (key = lambda v : - v [1 ])
132
164
@@ -136,54 +168,45 @@ def dump(
136
168
]
137
169
138
170
if debug .GENERATE_XES_FROM_INITIAL :
139
- keys = [st .object .id for st in initial_statements ]
140
-
141
- event_log = EventLog ()
142
- trace = Trace ()
143
-
144
- for key in keys :
145
- event = Event ()
146
- classes = set ()
147
- for st in statements :
148
- task = TaskIdentifier .get_task (st )
149
- if task == None or not "origins" in st .context .extensions :
150
- continue
151
- if not key in st .context .extensions ["origins" ]:
152
- continue
153
- classes .add (task [0 ])
154
- if len (classes ) == 0 :
155
- classes .add ("UNKNOWN" )
156
- classes = list (classes )
157
- classes .sort ()
158
- event ["concept:name" ] = "/" .join (classes )
159
- event ["org:resource" ] = st .object .definition .description ["en-US" ]
160
- event ["time:timestamp" ] = st .timestamp
161
- trace .append (event )
162
-
163
- event_log .append (trace )
164
-
165
- pm4py .write_xes (event_log , "out/initial.xes" )
171
+ generate_xes_from_initial (
172
+ initial_statements , statements , out = out + "initial.xes"
173
+ )
166
174
167
175
if debug .GENERATE_XES_FROM_CREATED :
176
+ generate_xes_from_created (statements , out = out + "created.xes" )
168
177
169
- event_log = EventLog ()
170
- trace = Trace ()
171
178
172
- for st in statements :
173
- event = Event ()
179
+ if __name__ == "__main__" :
180
+ parser = argparse .ArgumentParser (
181
+ prog = "GitXApiAnalysis" ,
182
+ description = "Program used to analyze git xapi files" ,
183
+ )
184
+
185
+ parser .add_argument ("filename" )
186
+ parser .add_argument (
187
+ "--generate" , "-g" , action = argparse .BooleanOptionalAction , default = False
188
+ )
189
+ parser .add_argument ("-o" , "--out" , default = "" )
190
+
191
+ args = parser .parse_args ()
174
192
175
- clazz = "UNKNOWN"
176
- task = TaskIdentifier .get_task (st )
177
- if task != None :
178
- clazz = task [0 ]
193
+ filename = args .filename
194
+ generate = args .generate
195
+ out_folder = args .out
179
196
180
- if clazz == "EmptyCommit" :
181
- continue
197
+ if out_folder != "" and out_folder [- 1 ] != "/" :
198
+ out_folder += "/"
199
+ if filename [- 1 ] == "/" :
200
+ filename = filename [:- 1 ]
182
201
183
- event ["concept:name" ] = clazz
184
- event ["time:timestamp" ] = st .timestamp
185
- trace .append (event )
202
+ if out_folder != "" and not os .path .exists (out_folder ):
203
+ os .makedirs (out_folder )
186
204
187
- event_log .append (trace )
205
+ if not os .path .exists (filename ):
206
+ raise FileNotFoundError (filename )
188
207
189
- pm4py .write_xes (event_log , "out/artificial.xes" )
208
+ if generate :
209
+ repo = Repo (filename )
210
+ generate_files (repo , out_folder , filename )
211
+ else :
212
+ process_file (filename , out_folder )
0 commit comments