-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlxquant.py
47 lines (40 loc) · 1.69 KB
/
lxquant.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import sys
import pandas as pd
import numpy as np
def lxquant(input_file, istd_file):
InternalStandard = "InternalStandard"
SPECIE = "SPECIE"
df1 = pd.read_csv(input_file)
df_istd = pd.read_csv(istd_file)
df1_istd = df1.merge(df_istd, left_on=SPECIE, right_on=InternalStandard)
df1_istd_tmp = df1_istd.select_dtypes(include=np.number)
# maybe use vectorization at some time... just discovered it when looking for this
# https://stackoverflow.com/questions/3379301/using-numpy-vectorize-on-functions-that-return-vectors
df1_istd_ratio = df1_istd_tmp.apply(
lambda x: x[-1] / x[1:], axis=1, result_type="expand"
) # started at 1 to drop mass
df1_istd_ratio["LipidClass"] = df1_istd["LipidClass"]
dfs = []
for i in range(len(df1_istd_ratio)):
an_istd = df1_istd_ratio.iloc[i] # just one df1_istd_ratio
df1_sub = df1[
df1["CLASS"] == an_istd["LipidClass"]
] # the identifiactions for that standard
# select columns
del an_istd["LipidClass"] # this cant be multiplied
cols = set(df1_sub.columns).intersection(
set(an_istd.index)
) # the colmuns in common
difCols = set(df1_sub.columns).difference(set(an_istd.index))
# make a result dataframe
res_df1_sub = df1_sub[difCols] # make the temlate for the reuslts
for col in cols:
res_df1_sub[col] = df1_sub[col] * an_istd[col]
dfs.append(res_df1_sub)
result_df = pd.concat(dfs)
return result_df
if __name__ == "__main__":
input_file = sys.argv[1]
istd_file = sys.argv[2]
result_df = lxquant(input_file, istd_file)
result_df.to_csv(input_file + ".quant.csv")