Skip to content

Commit afbcc55

Browse files
Initial commit of PrismIO (#3)
* initialized basic project structure. Added basic reader functions and groupby functions of io_frame Co-authored-by: Abhinav Bhatele <[email protected]>
1 parent 0ff98e9 commit afbcc55

File tree

8 files changed

+273
-1
lines changed

8 files changed

+273
-1
lines changed

.flake8

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# -*- conf -*-
2+
# flake8 settings for prismio
3+
#
4+
# These are the flake8 settings recommended by Black
5+
# https://github.com/psf/black/blob/master/docs/the_black_code_style.md
6+
7+
[flake8]
8+
max-line-length = 80
9+
select = C,E,F,W,B,B950
10+
extend-ignore = E203, E501

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*.pyc

.travis.yml

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
branches:
2+
only:
3+
- develop
4+
- /^releases\/.*$/
5+
6+
jobs:
7+
fast_finish: true
8+
include:
9+
- stage: "style check"
10+
python: '3.7'
11+
os: linux
12+
language: python
13+
script:
14+
- black --diff --check .
15+
- flake8
16+

LICENSE

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Copyright 2021 Parallel Software and Systems Group, University of Maryland
1+
Copyright 2020-2021 Parallel Software and Systems Group, University of Maryland
22

33
Permission is hereby granted, free of charge, to any person obtaining a
44
copy of this software and associated documentation files (the "Software"),

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
PrismIO is a Python-based tool that can take in performance data profiled by various apps and struct them to Pandas dataframes. It is intended to provide flexible and convinient api for user to analyze multiple runs.

prismio/io_frame.py

+84
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# Copyright 2020-2021 Parallel Software and Systems Group, University of
2+
# Maryland. See the top-level LICENSE file for details.
3+
#
4+
# SPDX-License-Identifier: MIT
5+
6+
"""
7+
The prismio.io_frame module provides the IOFrame class for structured data
8+
structure and flexible api of tracing/profiling data generated by Recorder
9+
or Darshan
10+
"""
11+
12+
13+
import sys
14+
import os
15+
import numpy as np
16+
import pandas as pd
17+
18+
class IOFrame:
19+
"""
20+
Main class of the prism application. It holds I/O performance data
21+
generated by I/O tools. It reorganizes the data into a Pandas.DataFrame,
22+
which contains useful information such as the start time of functions,
23+
the files functions access to, etc. It also provides flexible api
24+
functions for user to do analysis.
25+
"""
26+
def __init__(self, dataframe):
27+
"""
28+
Args:
29+
dataframe (DataFrame): the dataframe this IOFrame should have.
30+
31+
Return:
32+
None.
33+
34+
"""
35+
self.dataframe = dataframe
36+
37+
@staticmethod
38+
def from_recorder(log_dir):
39+
"""
40+
Read trace files from recorder and create the corresponding
41+
IOFrame object.
42+
43+
Args:
44+
log_dir (str): path to the trace files directory of Recorder the user wants to analyze.
45+
46+
Return:
47+
A IOFrame object corresponding to this trace files directory.
48+
49+
"""
50+
from prismio.readers.recorder_reader import RecorderReader
51+
return RecorderReader(log_dir).read()
52+
53+
def filter(self, my_lambda):
54+
"""
55+
Create a new IOFrame based on the filter function the user provides.
56+
57+
Args:
58+
my_lambda (function): filtering function. For example, np.sum, np.sort.
59+
60+
Return:
61+
A new IOFrame object with a new filtered dataframe.
62+
63+
"""
64+
dataframe = self.dataframe[self.dataframe.apply(my_lambda, axis = 1)]
65+
dataframe = dataframe.reset_index()
66+
dataframe = dataframe.drop('index', axis=1)
67+
return IOFrame(dataframe)
68+
69+
def groupby_aggregate(self, groupby_columns, agg_function):
70+
"""
71+
Return a dataframe after groupby and aggregate operations on the dataframe of this IOFrame.
72+
73+
Args:
74+
groupby_columns (list of strings): the column names the user want to groupby.
75+
agg_function (function or string): the function used for aggregation. For example, 'sum', 'count', etc.
76+
77+
Return:
78+
A dataframe after groupby and aggregate operations on the dataframe of this IOFrame.
79+
80+
"""
81+
groupby_obj = self.dataframe.groupby(groupby_columns)
82+
agg_dataframe = groupby_obj.agg(agg_function)
83+
return agg_dataframe
84+

prismio/multi_io_frame.py

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Copyright 2020-2021 Parallel Software and Systems Group, University of
2+
# Maryland. See the top-level LICENSE file for details.
3+
#
4+
# SPDX-License-Identifier: MIT
5+
6+
"""
7+
The prismio.multi_io_frame module aims to provide functionalities of analysing
8+
and comparing multiple runs.
9+
10+
"""
11+
12+
13+
import sys
14+
import glob
15+
import numpy as np
16+
import pandas as pd
17+
from prismio.io_frame import IOFrame
18+
19+
class MultiIOFrame():
20+
21+
def __init__(self, directories):
22+
"""
23+
Args:
24+
directories (list or str): a list of tracing directories or a root directory that contains tracing directories.
25+
26+
Return:
27+
None.
28+
29+
"""
30+
if type(directories) is not list and type(directories) is not str:
31+
sys.stderr.write("error: please pass in a root directory or a list of tracing directories\n")
32+
return(-1)
33+
34+
if type(directories) is str:
35+
directories = glob.glob(directories + "/*")
36+
37+
self.ioframes = {}
38+
for directory in directories:
39+
self.ioframes[directory] = IOFrame.from_recorder(directory)

prismio/readers/recorder_reader.py

+121
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
# Copyright 2020-2021 Parallel Software and Systems Group, University of
2+
# Maryland. See the top-level LICENSE file for details.
3+
#
4+
# SPDX-License-Identifier: MIT
5+
6+
"""
7+
The prismio.reader.recorder_reader module provides functions for processing tracing data
8+
from Recorder, for example, sorting records, finding the file name each record operates.
9+
With data processing, it organize the data to a dataframe, and create the IOFrame for
10+
recorder tracing files.
11+
12+
"""
13+
14+
15+
import sys
16+
import os
17+
from csv import writer
18+
import numpy as np
19+
import pandas as pd
20+
from prismio.io_frame import IOFrame
21+
import recorder_viz
22+
23+
class RecorderReader:
24+
"""
25+
The reader class for recorder data. It can read in recorder trace files,
26+
preprocess the data, and create a corresponding IOFrame.
27+
"""
28+
def __init__(self, log_dir):
29+
"""
30+
Use the Recorder creader_wrapper to read in tracing data.
31+
32+
Args:
33+
log_dir (string): path to the trace files directory of Recorder the user wants to analyze.
34+
35+
Return:
36+
None.
37+
38+
"""
39+
self.reader = recorder_viz.RecorderReader(log_dir)
40+
41+
def read(self):
42+
"""
43+
Call sort_records and then find_filenames. After it has all information needed,
44+
it creates the dataframe row by row. Then create an IOFrame with this dataframe.
45+
46+
Args:
47+
None.
48+
49+
Return:
50+
An IOFrame created by trace files of recorder specified by the log_dir of this RecorderReader.
51+
52+
"""
53+
all_records = []
54+
for rank in range(self.reader.GM.total_ranks):
55+
per_rank_records = []
56+
for record_index in range(self.reader.LMs[rank].total_records):
57+
per_rank_records.append(self.reader.records[rank][record_index])
58+
per_rank_records = sorted(per_rank_records, key=lambda x: x.tstart)
59+
all_records.append(per_rank_records)
60+
61+
records_as_dict = {
62+
'rank': [],
63+
'fid': [],
64+
'name': [],
65+
'tstart': [],
66+
'tend': [],
67+
'time': [],
68+
'arg_count': [],
69+
'args': [],
70+
'return_value': [],
71+
'file': []
72+
}
73+
74+
fd_to_filenames = [{0: "stdin", 1: "stdout", 2: "stderr"}] * self.reader.GM.total_ranks
75+
76+
for rank in range(self.reader.GM.total_ranks):
77+
for record in all_records[rank]:
78+
fd_to_filename = fd_to_filenames[rank]
79+
function_args = record.args_to_strs()
80+
func_name = self.reader.funcs[record.func_id]
81+
if 'fdopen' in func_name:
82+
fd = record.res
83+
old_fd = int(function_args[0])
84+
if old_fd not in fd_to_filename:
85+
filename = '__unknown__'
86+
else:
87+
filename = fd_to_filename[old_fd]
88+
fd_to_filename[fd] = filename
89+
elif 'fopen' in func_name or 'open' in func_name:
90+
fd = record.res
91+
filename = function_args[0]
92+
fd_to_filename[fd] = filename
93+
elif 'fwrite' in func_name or 'fread' in func_name:
94+
fd = int(function_args[3])
95+
if fd not in fd_to_filename:
96+
filename = '__unknown__'
97+
else:
98+
filename = fd_to_filename[fd]
99+
elif 'seek' in func_name or 'close' in func_name or 'sync' in func_name or 'writev' in func_name or 'readv' in func_name or 'pwrite' in func_name or 'pread' in func_name or 'write' in func_name or 'read' in func_name or 'fprintf' in func_name:
100+
fd = int(function_args[0])
101+
if fd not in fd_to_filename:
102+
filename = '__unknown__'
103+
else:
104+
filename = fd_to_filename[fd]
105+
else:
106+
filename = None
107+
108+
records_as_dict['rank'].append(rank)
109+
records_as_dict['fid'].append(record.func_id)
110+
records_as_dict['name'].append(func_name)
111+
records_as_dict['tstart'].append(record.tstart)
112+
records_as_dict['tend'].append(record.tend)
113+
records_as_dict['time'].append(record.tend - record.tstart)
114+
records_as_dict['arg_count'].append(record.arg_count)
115+
records_as_dict['args'].append(function_args)
116+
records_as_dict['return_value'].append(record.res)
117+
records_as_dict['file'].append(filename)
118+
119+
dataframe = pd.DataFrame.from_dict(records_as_dict)
120+
121+
return IOFrame(dataframe)

0 commit comments

Comments
 (0)