-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfun_get_value.py
238 lines (190 loc) · 8.04 KB
/
fun_get_value.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
import pandas as pd
import numpy as np
import plotly
import plotly.graph_objs as go
import plotly.offline as offline
'''
get one day's value with all type data from all reservoirs(if data is percentage : sting="percentage")
獲得單日資料,如果是百分比資料,則用string="percentage"去掉"%"
- parameter:
path(str): file folder
file_name(str): file name without year infomation(ex: "percentage-" or "In-Daily-")
date(str): assigned date(ex: "2019-4-7")
reservoir(bool): whether return reservoir list
string(str): data type in finding files("percentage", "floating_point")
'''
def a_day_value(path, file_name, date, reservoir, string):
df = pd.read_csv(path + file_name) # read file
df.index = df["Unnamed: 0"] # set date as DataFrame's index
try:
Capacity = list(df.loc[date][1:]) # get capacity / storage
except BaseException: # invalid date or other exception
print("Error! Can not search the data.")
return 0
if(string == "percentage"): # if data is percentage
Capacity = percent_to_num(Capacity)
if(reservoir):
reservoir_list = list(df.columns[1:]) # df.columns[0] is column name
return Capacity, reservoir_list
return Capacity
'''
read data and select every n(=days) days as a data point
讀入資料以每 n 天的平均值當作新的資料點,通常用於看整年每 n 天(n < 31)的資料趨勢
- parameter:
path(str): file folder
filename(str): file name without year infomation(ex: "percentage-" or "In-Daily-")
year(list[int]): assigned years
reservoir_name(str): a reservoir name
days(int): date period
string(str): data type in finding files("percentage", "floating_point")
'''
def n_days_value(path, filename, year, reservoir_name, days, string):
year_data = []
period = period_list(days,2015)
for yr in year:
past_data = []
file_name = path + filename + str(yr) + ".csv"
df = pd.read_csv(file_name)
df.index = list(df["Unnamed: 0"])
# # # scan for each day to get every n days data # # #
day = 1 # count for n days
while(day < df.shape[0]):
week_day, sum_data, total_day = 0, 0, 0
# # every n(=days) days, average values # #
isString = True
while(week_day != days and day < df.shape[0]):
data = df.iloc[day][reservoir_name] # date data
if(string == "percentage" and type(data) == str): # value is valid(ex: "XX.xx%")
data = data.replace("%", "")
elif(string != "percentage" and string != "floating_point"):
isString = False
print("string is a wrong assignment.")
return np.nan, np.nan
# if data = nan, skip it
sum_data += float(data)
total_day += 1
week_day += 1
day += 1
if(total_day): # total legal days
avg_data = sum_data / total_day
past_data.append(avg_data)
else:
past_data.append("NULL")
year_data.append(past_data)
return year_data, period
'''
get average month's value of assigned years
(if value is 5 times larger than average of past, print it out and skip adding it)
分別讀取每年每個月的進水量平均資料,如果該月平均水量大於過去平均值(判定為離群值)5倍,則不進行統計。
- parameter:
path(str): file folder
filename(str): file name without year infomation(ex: "percentage-" or "In-Daily-")
year(list[int]): list of assigned years
reservoir(list[str]): list of reservoirs' name
string(str): data type in finding files("percentage", "floating_point")
'''
def month_value(path, filename, year, reservoir, string):
data = []
sum_day = 31
sub_day = [0, -3, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0] # sum_day + sub_day = month's days
for i in range(len(reservoir)):
data.append([]) # create list for each reservoir
for yr in year: # scan for each year
file_name = path + filename + str(yr) + ".csv"
df = pd.read_csv(file_name)
df.index = list(df["Unnamed: 0"])
df[df == -999.9] = np.nan # ignore wrong data
df[df == 0] = np.nan
for num in range(len(reservoir)): # scan for each reservoir
days, count = 0, 0
while(count < 12):
next_days = days + sum_day + sub_day[count]
if(string == "percentage"):
value = percent_to_num(df[reservoir[num]][days: next_days]).mean()
else:
value = df[reservoir[num]][days: next_days].mean()
if(np.isnan(value)):
True
elif(len(data[num]) < count + 1): # has not been created month element
data[num].append(value)
else:
if(value / 5 > data[num][count] and year.index(yr) > 3 and data[num][count] > 100):
print(reservoir[num], ":", yr, "/", count + 1, "|", value, " | ", data[num][count], " | ", int(value / data[num][count]), " times")
else:
data[num][count] += value
data[num][count] /= 2 # get average
days = next_days
count += 1 # month count
return data
'''
total or average waterflow of each year
依據每年的總/平均水量計算
- parameter:
path(str): file folder
filename(str): file name without year infomation(ex: "percentage-" or "In-Daily-")
year(list[int]): list of assigned years
reservoir(str): list of reservoirs' name
string(str): data type in finding files("percentage", "floating_point")
'''
def year_value(path, file, reservoir_name, year_list, compute, string):
build, mean = False, []
for year in range(len(year_list)):
file_name = path + file + str(year_list[year]) + ".csv"
water = pd.read_csv(file_name)
water[water == 0] = np.nan
water[water == -999.9] = np.nan # it might be a wrong data
water2 = water[reservoir_name]
if(string == "percentage"):
water2 = pd.Series(percent_to_num(list(water2))) # convert to list and return to series
if(compute == "sum"):
mean.append(water2.sum(0)) # y direction
else:
mean.append(water2.mean(0))
return mean
def add_row_column(num, upper_bound):
if(num == upper_bound):
return 1
else:
return num + 1
'''
using days to compute date points of every period
根據 days(日期間隔) 和 year(年份),得到每段期間的日期點,作為作圖的 x 軸參考。
- parameter:
days(int): period of counting days
year(int): assigned year
'''
def period_list(days, year):
period = []
month_day, current, month, sum_day = 31, 0, 1, 31
sub_day = [0, -3, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0] # sum_day + sub_day = month's days
flag = False
while(1):
current += days
while(current > month_day):
current -= month_day
month += 1
if(month > 12): # over 365(/366)days
flag = True
break
if(year % 4 == 0 and month == 2):
month_day = 29
else:
month_day = 31 + sub_day[month - 1]
if(flag):
break
date = str(month) + "/" + str(current) # date format
period.append(date)
if(flag): # remaining days
period.append("12/31")
return period
'''
remove char % from each data, if data is nan, ignore
移除掉每筆資料上的 %
- parameter:
data(list[str]): list of data with "%"
'''
def percent_to_num(data):
for num in range(len(data)):
if(type(data[num]) == str): # if data is nan, type(data) is float
data[num] = float(data[num].replace("%", ""))
return data