Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH : improvement csv processing #517

Closed
wants to merge 10 commits into from
16 changes: 11 additions & 5 deletions rocketpy/mathutils/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@

import matplotlib.pyplot as plt
import numpy as np
import csv
from scipy import integrate, linalg, optimize
from ..tools import data_preprocessing

try:
from functools import cached_property
Expand Down Expand Up @@ -206,10 +208,12 @@ def set_source(self, source):
if isinstance(source, (str, Path)):
with open(source, "r") as file:
try:
source = np.loadtxt(file, delimiter=",", dtype=float)
source = np.loadtxt(file, delimiter=",")
except ValueError:
# If an error occurs, headers are present
source = np.loadtxt(source, delimiter=",", dtype=float, skiprows=1)
source = np.loadtxt(
data_preprocessing(source), delimiter=",", dtype=np.float64
)
except Exception as e:
raise ValueError(
"The source file is not a valid csv or txt file."
Expand Down Expand Up @@ -2911,10 +2915,12 @@ def _check_user_input(
if isinstance(source, (str, Path)):
# Convert to numpy array
try:
source = np.loadtxt(source, delimiter=",", dtype=float)
source = np.loadtxt(source, delimiter=",", dtype=np.float64)
except ValueError:
# Skip header
source = np.loadtxt(source, delimiter=",", dtype=float, skiprows=1)
# If an error occurs, there is a header
source = np.loadtxt(
data_preprocessing(source), delimiter=",", dtype=np.float64
Gui-FernandesBR marked this conversation as resolved.
Show resolved Hide resolved
)
except Exception as e:
raise ValueError(
"The source file is not a valid csv or txt file."
Expand Down
100 changes: 100 additions & 0 deletions rocketpy/tools.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import importlib
import importlib.metadata
import re
import csv
from bisect import bisect_left

import pytz
Expand Down Expand Up @@ -381,6 +382,105 @@ def check_requirement_version(module_name, version):
return True


def is_float(element):
"""
Returns a boolean indicating us if an element is convertible to a float or not.
True : the element is convertible to a float
False : the element is not convertible to a float

Parameters
----------
element : any
This is the element to test.

Returns
-------
result : boolean
The element is convertible or not.
"""
try:
float(element)
return True
except (ValueError, TypeError):
return False


def return_first_data(source):
"""
Returns the first data of a CSV file.

Parameters
----------
source : string
This is the file path to the csv.

Returns
-------
result : any
The first data of the CSV file.
"""
with open(source, "r") as native_data:
for row in native_data:
for value in row:
return value


def if_header(source):
"""
Returns if a CSV file has a header or not.
True : The CSV file has a header
False : The CSV file has no header

Parameters
----------
source : string
This is the file path to the csv.

Returns
-------
result : boolean
The result of the CSV file containing a header or not.
"""
return not is_float(return_first_data(source))


def data_preprocessing(source):
"""Clear data (in particular NaN objects) and returns a CSV file without header and its name.

Parameters
----------
source : string
The file path to the CSV file.

Returns
-------
Function
The function with the incoming cleared CSV
"""
output_path = "cleaned_data.csv"

with open(source, "r") as file:
reader = csv.reader(file)
header = next(reader) # Read the header

data = [row for row in reader]

# Create a new list without the headers
data_no_headers = []

for row in data[1:]:
# Check if the row is not empty and if all values in the row can be converted to float
if row and all(is_float(value) for value in row):
data_no_headers.append(row)

# Save the processed data to a new CSV file
with open(output_path, "w", encoding="utf-8") as output_file:
writer = csv.writer(output_file, delimiter=",")
writer.writerows(data_no_headers)
Comment on lines +476 to +479
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Personally, I don't think writing a new file with the processed data is really needed here.

More important, I think, is that we must make it clear to the user that not everything of their file was used (because there were NaN). So, could you trigger a warning that informs if any lines were skipped on source processing.

The implementation of this warning is up to you, but I believe that a simple boolean that is set to True in the for loop above if there were any skipped lines is enough. Then, if this boolean is True raise the warning.

There are other places in rocketpy that we raise warnings if you want to base the implementation on that. Of course, should you have any doubts don't hesitate in commenting.


return output_path


if __name__ == "__main__":
import doctest

Expand Down
Loading