-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path7_MultipleRegression_GradientDescent.py
More file actions
96 lines (71 loc) · 2.35 KB
/
Copy path7_MultipleRegression_GradientDescent.py
File metadata and controls
96 lines (71 loc) · 2.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import numpy as np
import pandas as pd
# =====================================
def AddOnes(df,features):
''' this function will add array of one to handle first weight
after that features will be add to counter other weights '''
featurearray = df[features]
ones = np.ones((df.shape[0],1))
x = np.column_stack((ones,featurearray))
return x
def GradientDescent(df,features,output,weights,stepsize,tolerance=0.1):
''' this function will return updated weights '''
x = AddOnes(df,features)
y = df[output].values
weights = np.array(weights).reshape(-1,1)
converge = False
while not converge:
y_predict = x @ weights
error = y_predict - y
# gradient of cost function
GradientSumSquares=0
for i in range(len(weights)):
# to calculate derivative just dot product of error and features
derivative=2 * error.T @ x[:,i].reshape(-1,1)
# work of rss is done by gradient
GradientSumSquares += derivative**2
z=stepsize * derivative
weights[i] -= np.array([z]).reshape(1,)
# gradient mag
GradientMagnitude = np.sqrt(GradientSumSquares)
if GradientMagnitude < tolerance:
converge = True
return weights
# -----------------------------------------------------
# upload data
df= pd.read_csv("kc_house_train_data.csv")
testData= pd.read_csv("kc_house_test_data.csv")
# -------- MODEL 1 -------------------------------------
features = ['sqft_living']
output = ['price']
weights = np.array([-47000.,1.])
stepsize=7e-12
tolerance = 2.5e7
w = GradientDescent(df,features,output,weights,stepsize,tolerance)
# Q 1:-
print(w)
print('\n')
# Q2 :-
x_test = AddOnes(testData,features)
y_test = testData[output].values.reshape(-1,1)
pred_y = x_test @ w
print(pred_y[:1],y_test[0])
print('\n')
# ---------------- MODEL 2 -----------------------------------
f2 = ['sqft_living','sqft_living15']
o2 = ['price']
initial_weights = np.array([-100000.,1.,1.])
stepsize = 4e-12
tolerance = 1e9
w2 = GradientDescent(df,f2,o2,initial_weights,stepsize,tolerance)
x_test2 = AddOnes(testData,f2)
pred_y2 = x_test2 @ w2
print(pred_y2[0])
print('\n')
# Q4
print(y_test[0],pred_y[0],pred_y2[0])
print('\n')
# Q5
rss1 = np.sum((pred_y-y_test)**2)
rss2 = np.sum((pred_y2 - y_test)**2)
print(rss1,rss2)