-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathutils.py
More file actions
92 lines (74 loc) · 2.15 KB
/
utils.py
File metadata and controls
92 lines (74 loc) · 2.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import datetime
from enum import Enum
import math
from numpy.polynomial.polynomial import Polynomial
def normalize(string: str):
return string.strip().lower().replace(" ", "-")
def label_from_bounds(lower, upper):
if upper is None:
return f">{lower}m"
else:
return f"{lower}m-{upper}m"
def update_totals(totals, key, datum):
if key not in totals:
totals[key] = (0, 0)
total, num = totals[key]
totals[key] = (total + datum, num + 1)
def mean(data):
return sum(data) / len(data)
def sd(data):
u = mean(data)
return math.sqrt(sum((x - u) ** 2 for x in data) / len(data))
def index_by_month(dates):
dates = list(dates)
start_year = min(date.year for date in dates)
return [date.month + (12 * (date.year - start_year)) for date in dates]
def date_from_float(num):
# ignoring leap years for simplicity
year = math.trunc(num)
# want 1-365 instead of 0-364
day = math.trunc((num - year) * 365) + 1
months = [
0, # padding
31, # January
28, # February
31, # March
30, # April
31, # May
30, # June
31, # July
31, # August
30, # September
31, # October
30, # November
31, # December
]
for m, d in enumerate(months):
if day < d:
month = m
break
else:
day -= d
return datetime.date(year, month, day)
class Trend(Enum):
NONE = 0
DIFF = 1
LINEAR = 2
def remove_seasonal_trend(x, y, trend=Trend.NONE, remove_sd=True):
to_process = list(y)
if trend == Trend.DIFF:
to_process = [a - b for a, b in zip(to_process[1:], to_process)]
elif trend == Trend.LINEAR:
domain = index_by_month(x)
fit = Polynomial.fit(domain, to_process, 1)
coeficients = fit.convert().coef
to_process = [
a - (coeficients[0] + coeficients[1] * b)
for a, b in zip(to_process, domain)
]
avg = mean(to_process)
out = [x - avg for x in to_process]
if remove_sd:
std_dev = sd(to_process)
out = [x / std_dev for x in out]
return out