Skip to content

Commit 5ad0064

Browse files
Add files via upload
1 parent e1e1cc8 commit 5ad0064

File tree

3 files changed

+444
-0
lines changed

3 files changed

+444
-0
lines changed

fun_get_value.py

+237
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
import pandas as pd
2+
import numpy as np
3+
4+
import plotly
5+
import plotly.graph_objs as go
6+
import plotly.offline as offline
7+
8+
'''
9+
get one day's value with all type data from all reservoirs(if data is percentage : sting="percentage")
10+
獲得單日資料,如果是百分比資料,則用string="percentage"去掉"%"
11+
12+
- parameter:
13+
path(str): file folder
14+
file_name(str): file name without year infomation(ex: "percentage-" or "In-Daily-")
15+
date(str): assigned date(ex: "2019-4-7")
16+
reservoir(bool): whether return reservoir list
17+
string(str): data type in finding files("percentage", "floating_point")
18+
'''
19+
def a_day_value(path, file_name, date, reservoir, string):
20+
21+
df = pd.read_csv(path + file_name) # read file
22+
df.index = df["Unnamed: 0"] # set date as DataFrame's index
23+
24+
try:
25+
Capacity = list(df.loc[date][1:]) # get capacity / storage
26+
27+
except BaseException: # invalid date or other exception
28+
print("Error! Can not search the data.")
29+
return 0
30+
31+
if(string == "percentage"): # if data is percentage
32+
Capacity = percent_to_num(Capacity)
33+
34+
if(reservoir):
35+
reservoir_list = list(df.columns[1:]) # df.columns[0] is column name
36+
return Capacity, reservoir_list
37+
38+
return Capacity
39+
40+
'''
41+
read data and select every n(=days) days as a data point
42+
讀入資料以每 n 天的平均值當作新的資料點,通常用於看整年每 n 天(n < 31)的資料趨勢
43+
44+
- parameter:
45+
path(str): file folder
46+
filename(str): file name without year infomation(ex: "percentage-" or "In-Daily-")
47+
year(list[int]): assigned years
48+
reservoir_name(str): a reservoir name
49+
days(int): date period
50+
string(str): data type in finding files("percentage", "floating_point")
51+
'''
52+
def n_days_value(path, filename, year, reservoir_name, days, string):
53+
54+
year_data = []
55+
period = period_list(days,2015)
56+
57+
for yr in year:
58+
59+
past_data = []
60+
file_name = path + filename + str(yr) + ".csv"
61+
df = pd.read_csv(file_name)
62+
df.index = list(df["Unnamed: 0"])
63+
64+
# # # scan for each day to get every n days data # # #
65+
66+
day = 1 # count for n days
67+
while(day < df.shape[0]):
68+
week_day, sum_data, total_day = 0, 0, 0
69+
70+
# # every n(=days) days, average values # #
71+
isString = True
72+
while(week_day != days and day < df.shape[0]):
73+
data = df.iloc[day][reservoir_name] # date data
74+
75+
if(string == "percentage" and type(data) == str): # value is valid(ex: "XX.xx%")
76+
data = data.replace("%", "")
77+
78+
elif(string != "percentage" and string != "floating_point"):
79+
isString = False
80+
print("string is a wrong assignment.")
81+
return np.nan, np.nan
82+
# if data = nan, skip it
83+
sum_data += float(data)
84+
total_day += 1
85+
week_day += 1
86+
day += 1
87+
88+
if(total_day): # total legal days
89+
avg_data = sum_data / total_day
90+
past_data.append(avg_data)
91+
else:
92+
past_data.append("NULL")
93+
94+
year_data.append(past_data)
95+
return year_data, period
96+
97+
'''
98+
get average month's value of assigned years
99+
(if value is 5 times larger than average of past, print it out and skip adding it)
100+
分別讀取每年每個月的進水量平均資料,如果該月平均水量大於過去平均值(判定為離群值)5倍,則不進行統計。
101+
102+
- parameter:
103+
path(str): file folder
104+
filename(str): file name without year infomation(ex: "percentage-" or "In-Daily-")
105+
year(list[int]): list of assigned years
106+
reservoir(list[str]): list of reservoirs' name
107+
string(str): data type in finding files("percentage", "floating_point")
108+
'''
109+
def month_value(path, filename, year, reservoir, string):
110+
data = []
111+
sum_day = 31
112+
sub_day = [0, -3, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0] # sum_day + sub_day = month's days
113+
for i in range(len(reservoir)):
114+
data.append([]) # create list for each reservoir
115+
116+
for yr in year: # scan for each year
117+
118+
file_name = path + filename + str(yr) + ".csv"
119+
df = pd.read_csv(file_name)
120+
df.index = list(df["Unnamed: 0"])
121+
122+
df[df == -999.9] = np.nan # ignore wrong data
123+
124+
for num in range(len(reservoir)): # scan for each reservoir
125+
days, count = 0, 0
126+
127+
while(count < 12):
128+
next_days = days + sum_day + sub_day[count]
129+
130+
if(string == "percentage"):
131+
value = percent_to_num(df[reservoir[num]][days: next_days]).mean()
132+
else:
133+
value = df[reservoir[num]][days: next_days].mean()
134+
135+
if(np.isnan(value)):
136+
True
137+
elif(len(data[num]) < count + 1): # has not been created month element
138+
data[num].append(value)
139+
else:
140+
if(value / 5 > data[num][count] and year.index(yr) > 3 and data[num][count] > 100):
141+
print(reservoir[num], ":", yr, "/", count, "|", value, " | ", data[num][count], " | ", int(value / data[num][count]), " times")
142+
143+
else:
144+
data[num][count] += value
145+
data[num][count] /= 2 # get average
146+
days = next_days
147+
count += 1 # month count
148+
149+
return data
150+
151+
152+
'''
153+
total or average waterflow of each year
154+
依據每年的總/平均水量計算
155+
156+
- parameter:
157+
path(str): file folder
158+
filename(str): file name without year infomation(ex: "percentage-" or "In-Daily-")
159+
year(list[int]): list of assigned years
160+
reservoir(list[str]): list of reservoirs' name
161+
string(str): data type in finding files("percentage", "floating_point")
162+
'''
163+
def year_value(path, file, reservoir_name, year_list, compute, string):
164+
build, mean = False, []
165+
for year in range(len(year_list)):
166+
file_name = path + file + str(year_list[year]) + ".csv"
167+
water = pd.read_csv(file_name)
168+
169+
water[water == 0] = np.nan
170+
water[water == -999.9] = np.nan # it might be a wrong data
171+
172+
water2 = water[reservoir_name]
173+
174+
if(string == "percentage"):
175+
water2 = pd.Series(percent_to_num(list(water2))) # convert to list and return to series
176+
177+
if(compute == "sum"):
178+
mean.append(water2.sum(0)) # y direction
179+
else:
180+
mean.append(water2.mean(0))
181+
182+
return mean
183+
184+
def add_row_column(num, upper_bound):
185+
if(num == upper_bound):
186+
return 1
187+
else:
188+
return num + 1
189+
190+
'''
191+
using days to compute date points of every period
192+
根據 days(日期間隔) 和 year(年份),得到每段期間的日期點,作為作圖的 x 軸參考。
193+
194+
- parameter:
195+
days(int): period of counting days
196+
year(int): assigned year
197+
'''
198+
def period_list(days, year):
199+
period = []
200+
month_day, current, month, sum_day = 31, 0, 1, 31
201+
sub_day = [0, -3, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0] # sum_day + sub_day = month's days
202+
flag = False
203+
while(1):
204+
current += days
205+
while(current > month_day):
206+
current -= month_day
207+
month += 1
208+
if(month > 12): # over 365(/366)days
209+
flag = True
210+
break
211+
if(year % 4 == 0 and month == 2):
212+
month_day = 29
213+
else:
214+
month_day = 31 + sub_day[month - 1]
215+
if(flag):
216+
break
217+
date = str(month) + "/" + str(current) # date format
218+
period.append(date)
219+
220+
if(flag): # remaining days
221+
period.append("12/31")
222+
223+
return period
224+
225+
'''
226+
remove char % from each data, if data is nan, ignore
227+
移除掉每筆資料上的 %
228+
229+
- parameter:
230+
data(list[str]): list of data with "%"
231+
'''
232+
def percent_to_num(data):
233+
234+
for num in range(len(data)):
235+
if(type(data[num]) == str): # if data is nan, type(data) is float
236+
data[num] = float(data[num].replace("%", ""))
237+
return data

fun_ploting.py

+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import numpy as np
2+
import pandas as pd
3+
import csv
4+
5+
import plotly
6+
import plotly.graph_objs as go
7+
import plotly.offline as offline
8+
from plotly import tools # for subplot
9+
10+
from fun_get_value import add_row_column
11+
12+
'''
13+
Bar Graph 長條圖
14+
15+
- parameter:
16+
x(list): list of x value
17+
y(list): list of y value
18+
name(list[str]): each data's name
19+
title(str): graph's title
20+
xaxis(str): name of x-axis
21+
yaxis(str): name of y-axis
22+
file_name(str): file name
23+
'''
24+
def bar_plot(x, y, name, title, xaxis, yaxis, file_name):
25+
data = []
26+
for i in range(len(name)):
27+
trace = go.Bar(x = x[i], y = y[i], name = name[i])
28+
data.append(trace)
29+
30+
layout = go.Layout(title = title, xaxis = dict(title = xaxis), yaxis = dict(title = yaxis))
31+
fig = go.Figure(data = data, layout = layout)
32+
offline.plot(fig, filename = file_name + ".html") # output: HTML file
33+
34+
35+
'''
36+
Line Graph 折線圖
37+
38+
- parameter:
39+
x(list): list of x value
40+
y(list): list of y value
41+
name(list[str]): each data's name
42+
title(str): graph's title
43+
xaxis(str): name of x-axis
44+
yaxis(str): name of y-axis
45+
file_name(str): file name
46+
'''
47+
def line_plot(x, y, name, title, xaxis, yaxis, file_name):
48+
data = []
49+
# create multiple plot and concat
50+
for i in range(len(name)):
51+
trace = go.Scatter(x = x[i], y = y[i], mode = "lines", name = name[i])
52+
data.append(trace)
53+
54+
layout = go.Layout(title = title, xaxis = dict(title = xaxis), yaxis = dict(title = yaxis))
55+
fig = go.Figure(data = data, layout = layout)
56+
offline.plot(fig, filename = file_name + ".html") # output: HTML file
57+
58+
'''
59+
Subplots 多圖呈現(長條圖)
60+
根據送進來的資料,依照順序(由左到右、由上到下)輸出多圖,可供不同水庫間的資料比較。
61+
62+
- parameter:
63+
x(list): list of x value
64+
y(list): list of y value
65+
name(list[str]): each data's name
66+
title(str): graph's title
67+
xaxis(list[str]): list of name of x-axis
68+
yaxis(list[str]): list of name of y-axis
69+
file_name(str): file name
70+
row(int): number of subplot rows
71+
column(int): number of subplot columns
72+
'''
73+
def sub_plot(x, y, name, title, xaxis, yaxis, file_name, row, column):
74+
75+
fig = tools.make_subplots(rows = row, cols = column, subplot_titles = name)
76+
i, j = 1, 1
77+
for num in range(len(name)):
78+
tmp = go.Bar(x = x, y = y[num], name = name[num]) # a subplot
79+
fig.append_trace(tmp, i, j)
80+
fig['layout']['xaxis' + str(column*(i-1)+j)].update(title = xaxis[num]) # x-axis title
81+
fig['layout']['yaxis' + str(column*(i-1)+j)].update(title = yaxis[num]) # y-axis title
82+
# control subplot i and j position #
83+
if(j == column):
84+
i, j = add_row_column(i, row), add_row_column(j, column)
85+
else:
86+
j = add_row_column(j, column)
87+
88+
fig['layout'].update(title = title)
89+
offline.plot(fig, filename = file_name + ".html")
90+
91+

0 commit comments

Comments
 (0)