1
+ import pandas as pd
2
+ import numpy as np
3
+
4
+ import plotly
5
+ import plotly .graph_objs as go
6
+ import plotly .offline as offline
7
+
8
+ '''
9
+ get one day's value with all type data from all reservoirs(if data is percentage : sting="percentage")
10
+ 獲得單日資料,如果是百分比資料,則用string="percentage"去掉"%"
11
+
12
+ - parameter:
13
+ path(str): file folder
14
+ file_name(str): file name without year infomation(ex: "percentage-" or "In-Daily-")
15
+ date(str): assigned date(ex: "2019-4-7")
16
+ reservoir(bool): whether return reservoir list
17
+ string(str): data type in finding files("percentage", "floating_point")
18
+ '''
19
+ def a_day_value (path , file_name , date , reservoir , string ):
20
+
21
+ df = pd .read_csv (path + file_name ) # read file
22
+ df .index = df ["Unnamed: 0" ] # set date as DataFrame's index
23
+
24
+ try :
25
+ Capacity = list (df .loc [date ][1 :]) # get capacity / storage
26
+
27
+ except BaseException : # invalid date or other exception
28
+ print ("Error! Can not search the data." )
29
+ return 0
30
+
31
+ if (string == "percentage" ): # if data is percentage
32
+ Capacity = percent_to_num (Capacity )
33
+
34
+ if (reservoir ):
35
+ reservoir_list = list (df .columns [1 :]) # df.columns[0] is column name
36
+ return Capacity , reservoir_list
37
+
38
+ return Capacity
39
+
40
+ '''
41
+ read data and select every n(=days) days as a data point
42
+ 讀入資料以每 n 天的平均值當作新的資料點,通常用於看整年每 n 天(n < 31)的資料趨勢
43
+
44
+ - parameter:
45
+ path(str): file folder
46
+ filename(str): file name without year infomation(ex: "percentage-" or "In-Daily-")
47
+ year(list[int]): assigned years
48
+ reservoir_name(str): a reservoir name
49
+ days(int): date period
50
+ string(str): data type in finding files("percentage", "floating_point")
51
+ '''
52
+ def n_days_value (path , filename , year , reservoir_name , days , string ):
53
+
54
+ year_data = []
55
+ period = period_list (days ,2015 )
56
+
57
+ for yr in year :
58
+
59
+ past_data = []
60
+ file_name = path + filename + str (yr ) + ".csv"
61
+ df = pd .read_csv (file_name )
62
+ df .index = list (df ["Unnamed: 0" ])
63
+
64
+ # # # scan for each day to get every n days data # # #
65
+
66
+ day = 1 # count for n days
67
+ while (day < df .shape [0 ]):
68
+ week_day , sum_data , total_day = 0 , 0 , 0
69
+
70
+ # # every n(=days) days, average values # #
71
+ isString = True
72
+ while (week_day != days and day < df .shape [0 ]):
73
+ data = df .iloc [day ][reservoir_name ] # date data
74
+
75
+ if (string == "percentage" and type (data ) == str ): # value is valid(ex: "XX.xx%")
76
+ data = data .replace ("%" , "" )
77
+
78
+ elif (string != "percentage" and string != "floating_point" ):
79
+ isString = False
80
+ print ("string is a wrong assignment." )
81
+ return np .nan , np .nan
82
+ # if data = nan, skip it
83
+ sum_data += float (data )
84
+ total_day += 1
85
+ week_day += 1
86
+ day += 1
87
+
88
+ if (total_day ): # total legal days
89
+ avg_data = sum_data / total_day
90
+ past_data .append (avg_data )
91
+ else :
92
+ past_data .append ("NULL" )
93
+
94
+ year_data .append (past_data )
95
+ return year_data , period
96
+
97
+ '''
98
+ get average month's value of assigned years
99
+ (if value is 5 times larger than average of past, print it out and skip adding it)
100
+ 分別讀取每年每個月的進水量平均資料,如果該月平均水量大於過去平均值(判定為離群值)5倍,則不進行統計。
101
+
102
+ - parameter:
103
+ path(str): file folder
104
+ filename(str): file name without year infomation(ex: "percentage-" or "In-Daily-")
105
+ year(list[int]): list of assigned years
106
+ reservoir(list[str]): list of reservoirs' name
107
+ string(str): data type in finding files("percentage", "floating_point")
108
+ '''
109
+ def month_value (path , filename , year , reservoir , string ):
110
+ data = []
111
+ sum_day = 31
112
+ sub_day = [0 , - 3 , 0 , - 1 , 0 , - 1 , 0 , 0 , - 1 , 0 , - 1 , 0 ] # sum_day + sub_day = month's days
113
+ for i in range (len (reservoir )):
114
+ data .append ([]) # create list for each reservoir
115
+
116
+ for yr in year : # scan for each year
117
+
118
+ file_name = path + filename + str (yr ) + ".csv"
119
+ df = pd .read_csv (file_name )
120
+ df .index = list (df ["Unnamed: 0" ])
121
+
122
+ df [df == - 999.9 ] = np .nan # ignore wrong data
123
+
124
+ for num in range (len (reservoir )): # scan for each reservoir
125
+ days , count = 0 , 0
126
+
127
+ while (count < 12 ):
128
+ next_days = days + sum_day + sub_day [count ]
129
+
130
+ if (string == "percentage" ):
131
+ value = percent_to_num (df [reservoir [num ]][days : next_days ]).mean ()
132
+ else :
133
+ value = df [reservoir [num ]][days : next_days ].mean ()
134
+
135
+ if (np .isnan (value )):
136
+ True
137
+ elif (len (data [num ]) < count + 1 ): # has not been created month element
138
+ data [num ].append (value )
139
+ else :
140
+ if (value / 5 > data [num ][count ] and year .index (yr ) > 3 and data [num ][count ] > 100 ):
141
+ print (reservoir [num ], ":" , yr , "/" , count , "|" , value , " | " , data [num ][count ], " | " , int (value / data [num ][count ]), " times" )
142
+
143
+ else :
144
+ data [num ][count ] += value
145
+ data [num ][count ] /= 2 # get average
146
+ days = next_days
147
+ count += 1 # month count
148
+
149
+ return data
150
+
151
+
152
+ '''
153
+ total or average waterflow of each year
154
+ 依據每年的總/平均水量計算
155
+
156
+ - parameter:
157
+ path(str): file folder
158
+ filename(str): file name without year infomation(ex: "percentage-" or "In-Daily-")
159
+ year(list[int]): list of assigned years
160
+ reservoir(list[str]): list of reservoirs' name
161
+ string(str): data type in finding files("percentage", "floating_point")
162
+ '''
163
+ def year_value (path , file , reservoir_name , year_list , compute , string ):
164
+ build , mean = False , []
165
+ for year in range (len (year_list )):
166
+ file_name = path + file + str (year_list [year ]) + ".csv"
167
+ water = pd .read_csv (file_name )
168
+
169
+ water [water == 0 ] = np .nan
170
+ water [water == - 999.9 ] = np .nan # it might be a wrong data
171
+
172
+ water2 = water [reservoir_name ]
173
+
174
+ if (string == "percentage" ):
175
+ water2 = pd .Series (percent_to_num (list (water2 ))) # convert to list and return to series
176
+
177
+ if (compute == "sum" ):
178
+ mean .append (water2 .sum (0 )) # y direction
179
+ else :
180
+ mean .append (water2 .mean (0 ))
181
+
182
+ return mean
183
+
184
+ def add_row_column (num , upper_bound ):
185
+ if (num == upper_bound ):
186
+ return 1
187
+ else :
188
+ return num + 1
189
+
190
+ '''
191
+ using days to compute date points of every period
192
+ 根據 days(日期間隔) 和 year(年份),得到每段期間的日期點,作為作圖的 x 軸參考。
193
+
194
+ - parameter:
195
+ days(int): period of counting days
196
+ year(int): assigned year
197
+ '''
198
+ def period_list (days , year ):
199
+ period = []
200
+ month_day , current , month , sum_day = 31 , 0 , 1 , 31
201
+ sub_day = [0 , - 3 , 0 , - 1 , 0 , - 1 , 0 , 0 , - 1 , 0 , - 1 , 0 ] # sum_day + sub_day = month's days
202
+ flag = False
203
+ while (1 ):
204
+ current += days
205
+ while (current > month_day ):
206
+ current -= month_day
207
+ month += 1
208
+ if (month > 12 ): # over 365(/366)days
209
+ flag = True
210
+ break
211
+ if (year % 4 == 0 and month == 2 ):
212
+ month_day = 29
213
+ else :
214
+ month_day = 31 + sub_day [month - 1 ]
215
+ if (flag ):
216
+ break
217
+ date = str (month ) + "/" + str (current ) # date format
218
+ period .append (date )
219
+
220
+ if (flag ): # remaining days
221
+ period .append ("12/31" )
222
+
223
+ return period
224
+
225
+ '''
226
+ remove char % from each data, if data is nan, ignore
227
+ 移除掉每筆資料上的 %
228
+
229
+ - parameter:
230
+ data(list[str]): list of data with "%"
231
+ '''
232
+ def percent_to_num (data ):
233
+
234
+ for num in range (len (data )):
235
+ if (type (data [num ]) == str ): # if data is nan, type(data) is float
236
+ data [num ] = float (data [num ].replace ("%" , "" ))
237
+ return data
0 commit comments