|
| 1 | +# Copyright 2020-2021 Parallel Software and Systems Group, University of |
| 2 | +# Maryland. See the top-level LICENSE file for details. |
| 3 | +# |
| 4 | +# SPDX-License-Identifier: MIT |
| 5 | + |
| 6 | +""" |
| 7 | +The prismio.reader.recorder_reader module provides functions for processing tracing data |
| 8 | +from Recorder, for example, sorting records, finding the file name each record operates. |
| 9 | +With data processing, it organize the data to a dataframe, and create the IOFrame for |
| 10 | +recorder tracing files. |
| 11 | +
|
| 12 | +""" |
| 13 | + |
| 14 | + |
| 15 | +import sys |
| 16 | +import os |
| 17 | +from csv import writer |
| 18 | +import numpy as np |
| 19 | +import pandas as pd |
| 20 | +from prismio.io_frame import IOFrame |
| 21 | +import recorder_viz |
| 22 | + |
| 23 | +class RecorderReader: |
| 24 | + """ |
| 25 | + The reader class for recorder data. It can read in recorder trace files, |
| 26 | + preprocess the data, and create a corresponding IOFrame. |
| 27 | + """ |
| 28 | + def __init__(self, log_dir): |
| 29 | + """ |
| 30 | + Use the Recorder creader_wrapper to read in tracing data. |
| 31 | +
|
| 32 | + Args: |
| 33 | + log_dir (string): path to the trace files directory of Recorder the user wants to analyze. |
| 34 | +
|
| 35 | + Return: |
| 36 | + None. |
| 37 | +
|
| 38 | + """ |
| 39 | + self.reader = recorder_viz.RecorderReader(log_dir) |
| 40 | + |
| 41 | + def read(self): |
| 42 | + """ |
| 43 | + Call sort_records and then find_filenames. After it has all information needed, |
| 44 | + it creates the dataframe row by row. Then create an IOFrame with this dataframe. |
| 45 | +
|
| 46 | + Args: |
| 47 | + None. |
| 48 | +
|
| 49 | + Return: |
| 50 | + An IOFrame created by trace files of recorder specified by the log_dir of this RecorderReader. |
| 51 | +
|
| 52 | + """ |
| 53 | + all_records = [] |
| 54 | + for rank in range(self.reader.GM.total_ranks): |
| 55 | + per_rank_records = [] |
| 56 | + for record_index in range(self.reader.LMs[rank].total_records): |
| 57 | + per_rank_records.append(self.reader.records[rank][record_index]) |
| 58 | + per_rank_records = sorted(per_rank_records, key=lambda x: x.tstart) |
| 59 | + all_records.append(per_rank_records) |
| 60 | + |
| 61 | + records_as_dict = { |
| 62 | + 'rank': [], |
| 63 | + 'fid': [], |
| 64 | + 'name': [], |
| 65 | + 'tstart': [], |
| 66 | + 'tend': [], |
| 67 | + 'time': [], |
| 68 | + 'arg_count': [], |
| 69 | + 'args': [], |
| 70 | + 'return_value': [], |
| 71 | + 'file': [] |
| 72 | + } |
| 73 | + |
| 74 | + fd_to_filenames = [{0: "stdin", 1: "stdout", 2: "stderr"}] * self.reader.GM.total_ranks |
| 75 | + |
| 76 | + for rank in range(self.reader.GM.total_ranks): |
| 77 | + for record in all_records[rank]: |
| 78 | + fd_to_filename = fd_to_filenames[rank] |
| 79 | + function_args = record.args_to_strs() |
| 80 | + func_name = self.reader.funcs[record.func_id] |
| 81 | + if 'fdopen' in func_name: |
| 82 | + fd = record.res |
| 83 | + old_fd = int(function_args[0]) |
| 84 | + if old_fd not in fd_to_filename: |
| 85 | + filename = '__unknown__' |
| 86 | + else: |
| 87 | + filename = fd_to_filename[old_fd] |
| 88 | + fd_to_filename[fd] = filename |
| 89 | + elif 'fopen' in func_name or 'open' in func_name: |
| 90 | + fd = record.res |
| 91 | + filename = function_args[0] |
| 92 | + fd_to_filename[fd] = filename |
| 93 | + elif 'fwrite' in func_name or 'fread' in func_name: |
| 94 | + fd = int(function_args[3]) |
| 95 | + if fd not in fd_to_filename: |
| 96 | + filename = '__unknown__' |
| 97 | + else: |
| 98 | + filename = fd_to_filename[fd] |
| 99 | + elif 'seek' in func_name or 'close' in func_name or 'sync' in func_name or 'writev' in func_name or 'readv' in func_name or 'pwrite' in func_name or 'pread' in func_name or 'write' in func_name or 'read' in func_name or 'fprintf' in func_name: |
| 100 | + fd = int(function_args[0]) |
| 101 | + if fd not in fd_to_filename: |
| 102 | + filename = '__unknown__' |
| 103 | + else: |
| 104 | + filename = fd_to_filename[fd] |
| 105 | + else: |
| 106 | + filename = None |
| 107 | + |
| 108 | + records_as_dict['rank'].append(rank) |
| 109 | + records_as_dict['fid'].append(record.func_id) |
| 110 | + records_as_dict['name'].append(func_name) |
| 111 | + records_as_dict['tstart'].append(record.tstart) |
| 112 | + records_as_dict['tend'].append(record.tend) |
| 113 | + records_as_dict['time'].append(record.tend - record.tstart) |
| 114 | + records_as_dict['arg_count'].append(record.arg_count) |
| 115 | + records_as_dict['args'].append(function_args) |
| 116 | + records_as_dict['return_value'].append(record.res) |
| 117 | + records_as_dict['file'].append(filename) |
| 118 | + |
| 119 | + dataframe = pd.DataFrame.from_dict(records_as_dict) |
| 120 | + |
| 121 | + return IOFrame(dataframe) |
0 commit comments