forked from mahat/PoissonRegression
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFrequencyDomainPoissonRegression.py
76 lines (63 loc) · 2.18 KB
/
FrequencyDomainPoissonRegression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
'''
Author: mahat
'''
import pandas as pd
import statsmodels.api as sm
from math import pi, sin, cos
import numpy as np
import matplotlib.pyplot as plt
# reading data
data = pd.read_csv('./data/VisitData.txt', delim_whitespace=True, header=0)
print '----- data head -----'
print data.head()
print '----- data description -----'
print data.describe()
# plotting hour vs count data
plt.plot(data['hour'], data['count'], 'o')
plt.title('Visitor counts vs hour')
plt.xlabel('Hour')
plt.ylabel('Visitor Count')
plt.show()
# in data hours are incremented in everyday
# however, time series data have generally cycles so we need to transform hour column into hour in a day
data['hourofday'] = data['hour'].apply(lambda x: ((x - 1) % 24) + 1)
print data.head()
# plotting histogram which is grouped by according to hourofday
totalVisit = np.zeros(24)
for index, row in data.iterrows():
totalVisit[row['hourofday'] - 1] = totalVisit[row['hourofday'] - 1] + row['count']
width = 1 / 1.5
plt.bar(range(1, 25), totalVisit, width, color="blue")
plt.ylabel('Mean Count of Visitors')
plt.xlabel('Hour of the day')
plt.title('Histogram')
plt.show()
# from the bar chart, it can be said that there is a cycle
# we can convert hour data into frequency domain therefore we can handle cycles
data['w'] = data['hour'].apply(lambda h: (float(h) / 24) * 2 * pi)
# conversion to frequency domain
data['fdomain'] = data['w'].apply(lambda w: sin(w) + cos(w) + sin(2*w) + cos(2*w))
# applying poisson regression
# X
feat_cols = ['fdomain']
X = [elem for elem in data[feat_cols].values]
# adding costant to adding bias
X = sm.add_constant(X, prepend=False)
# Y
Y = [elem for elem in data['count'].values]
# building the model
poisson_mod = sm.Poisson(Y, X)
poisson_res = poisson_mod.fit(method="newton")
print(poisson_res.summary())
# predicted vals
predVals = poisson_res.predict(X)
# plotting results
plt.plot(data['hourofday'], data['count'], 'bo',label= 'visitor counts')
plt.hold(True)
plt.plot(data['hourofday'], predVals, 'ro-',label= 'predicted visitor counts')
plt.legend()
plt.title('Comparesion between distribution of count and predicted values')
plt.xlabel('Hour of day')
plt.ylabel('Visitor Count')
plt.hold(False)
plt.show()