Skip to content

Commit d895961

Browse files
committed
Initial commit for the algorithm and data
1 parent 4055b2b commit d895961

File tree

5 files changed

+723
-0
lines changed

5 files changed

+723
-0
lines changed

data/iris.csv

+150
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
5.1,3.5,1.4,0.2,Iris-setosa
2+
4.9,3.0,1.4,0.2,Iris-setosa
3+
4.7,3.2,1.3,0.2,Iris-setosa
4+
4.6,3.1,1.5,0.2,Iris-setosa
5+
5.0,3.6,1.4,0.2,Iris-setosa
6+
5.4,3.9,1.7,0.4,Iris-setosa
7+
4.6,3.4,1.4,0.3,Iris-setosa
8+
5.0,3.4,1.5,0.2,Iris-setosa
9+
4.4,2.9,1.4,0.2,Iris-setosa
10+
4.9,3.1,1.5,0.1,Iris-setosa
11+
5.4,3.7,1.5,0.2,Iris-setosa
12+
4.8,3.4,1.6,0.2,Iris-setosa
13+
4.8,3.0,1.4,0.1,Iris-setosa
14+
4.3,3.0,1.1,0.1,Iris-setosa
15+
5.8,4.0,1.2,0.2,Iris-setosa
16+
5.7,4.4,1.5,0.4,Iris-setosa
17+
5.4,3.9,1.3,0.4,Iris-setosa
18+
5.1,3.5,1.4,0.3,Iris-setosa
19+
5.7,3.8,1.7,0.3,Iris-setosa
20+
5.1,3.8,1.5,0.3,Iris-setosa
21+
5.4,3.4,1.7,0.2,Iris-setosa
22+
5.1,3.7,1.5,0.4,Iris-setosa
23+
4.6,3.6,1.0,0.2,Iris-setosa
24+
5.1,3.3,1.7,0.5,Iris-setosa
25+
4.8,3.4,1.9,0.2,Iris-setosa
26+
5.0,3.0,1.6,0.2,Iris-setosa
27+
5.0,3.4,1.6,0.4,Iris-setosa
28+
5.2,3.5,1.5,0.2,Iris-setosa
29+
5.2,3.4,1.4,0.2,Iris-setosa
30+
4.7,3.2,1.6,0.2,Iris-setosa
31+
4.8,3.1,1.6,0.2,Iris-setosa
32+
5.4,3.4,1.5,0.4,Iris-setosa
33+
5.2,4.1,1.5,0.1,Iris-setosa
34+
5.5,4.2,1.4,0.2,Iris-setosa
35+
4.9,3.1,1.5,0.1,Iris-setosa
36+
5.0,3.2,1.2,0.2,Iris-setosa
37+
5.5,3.5,1.3,0.2,Iris-setosa
38+
4.9,3.1,1.5,0.1,Iris-setosa
39+
4.4,3.0,1.3,0.2,Iris-setosa
40+
5.1,3.4,1.5,0.2,Iris-setosa
41+
5.0,3.5,1.3,0.3,Iris-setosa
42+
4.5,2.3,1.3,0.3,Iris-setosa
43+
4.4,3.2,1.3,0.2,Iris-setosa
44+
5.0,3.5,1.6,0.6,Iris-setosa
45+
5.1,3.8,1.9,0.4,Iris-setosa
46+
4.8,3.0,1.4,0.3,Iris-setosa
47+
5.1,3.8,1.6,0.2,Iris-setosa
48+
4.6,3.2,1.4,0.2,Iris-setosa
49+
5.3,3.7,1.5,0.2,Iris-setosa
50+
5.0,3.3,1.4,0.2,Iris-setosa
51+
7.0,3.2,4.7,1.4,Iris-versicolor
52+
6.4,3.2,4.5,1.5,Iris-versicolor
53+
6.9,3.1,4.9,1.5,Iris-versicolor
54+
5.5,2.3,4.0,1.3,Iris-versicolor
55+
6.5,2.8,4.6,1.5,Iris-versicolor
56+
5.7,2.8,4.5,1.3,Iris-versicolor
57+
6.3,3.3,4.7,1.6,Iris-versicolor
58+
4.9,2.4,3.3,1.0,Iris-versicolor
59+
6.6,2.9,4.6,1.3,Iris-versicolor
60+
5.2,2.7,3.9,1.4,Iris-versicolor
61+
5.0,2.0,3.5,1.0,Iris-versicolor
62+
5.9,3.0,4.2,1.5,Iris-versicolor
63+
6.0,2.2,4.0,1.0,Iris-versicolor
64+
6.1,2.9,4.7,1.4,Iris-versicolor
65+
5.6,2.9,3.6,1.3,Iris-versicolor
66+
6.7,3.1,4.4,1.4,Iris-versicolor
67+
5.6,3.0,4.5,1.5,Iris-versicolor
68+
5.8,2.7,4.1,1.0,Iris-versicolor
69+
6.2,2.2,4.5,1.5,Iris-versicolor
70+
5.6,2.5,3.9,1.1,Iris-versicolor
71+
5.9,3.2,4.8,1.8,Iris-versicolor
72+
6.1,2.8,4.0,1.3,Iris-versicolor
73+
6.3,2.5,4.9,1.5,Iris-versicolor
74+
6.1,2.8,4.7,1.2,Iris-versicolor
75+
6.4,2.9,4.3,1.3,Iris-versicolor
76+
6.6,3.0,4.4,1.4,Iris-versicolor
77+
6.8,2.8,4.8,1.4,Iris-versicolor
78+
6.7,3.0,5.0,1.7,Iris-versicolor
79+
6.0,2.9,4.5,1.5,Iris-versicolor
80+
5.7,2.6,3.5,1.0,Iris-versicolor
81+
5.5,2.4,3.8,1.1,Iris-versicolor
82+
5.5,2.4,3.7,1.0,Iris-versicolor
83+
5.8,2.7,3.9,1.2,Iris-versicolor
84+
6.0,2.7,5.1,1.6,Iris-versicolor
85+
5.4,3.0,4.5,1.5,Iris-versicolor
86+
6.0,3.4,4.5,1.6,Iris-versicolor
87+
6.7,3.1,4.7,1.5,Iris-versicolor
88+
6.3,2.3,4.4,1.3,Iris-versicolor
89+
5.6,3.0,4.1,1.3,Iris-versicolor
90+
5.5,2.5,4.0,1.3,Iris-versicolor
91+
5.5,2.6,4.4,1.2,Iris-versicolor
92+
6.1,3.0,4.6,1.4,Iris-versicolor
93+
5.8,2.6,4.0,1.2,Iris-versicolor
94+
5.0,2.3,3.3,1.0,Iris-versicolor
95+
5.6,2.7,4.2,1.3,Iris-versicolor
96+
5.7,3.0,4.2,1.2,Iris-versicolor
97+
5.7,2.9,4.2,1.3,Iris-versicolor
98+
6.2,2.9,4.3,1.3,Iris-versicolor
99+
5.1,2.5,3.0,1.1,Iris-versicolor
100+
5.7,2.8,4.1,1.3,Iris-versicolor
101+
6.3,3.3,6.0,2.5,Iris-virginica
102+
5.8,2.7,5.1,1.9,Iris-virginica
103+
7.1,3.0,5.9,2.1,Iris-virginica
104+
6.3,2.9,5.6,1.8,Iris-virginica
105+
6.5,3.0,5.8,2.2,Iris-virginica
106+
7.6,3.0,6.6,2.1,Iris-virginica
107+
4.9,2.5,4.5,1.7,Iris-virginica
108+
7.3,2.9,6.3,1.8,Iris-virginica
109+
6.7,2.5,5.8,1.8,Iris-virginica
110+
7.2,3.6,6.1,2.5,Iris-virginica
111+
6.5,3.2,5.1,2.0,Iris-virginica
112+
6.4,2.7,5.3,1.9,Iris-virginica
113+
6.8,3.0,5.5,2.1,Iris-virginica
114+
5.7,2.5,5.0,2.0,Iris-virginica
115+
5.8,2.8,5.1,2.4,Iris-virginica
116+
6.4,3.2,5.3,2.3,Iris-virginica
117+
6.5,3.0,5.5,1.8,Iris-virginica
118+
7.7,3.8,6.7,2.2,Iris-virginica
119+
7.7,2.6,6.9,2.3,Iris-virginica
120+
6.0,2.2,5.0,1.5,Iris-virginica
121+
6.9,3.2,5.7,2.3,Iris-virginica
122+
5.6,2.8,4.9,2.0,Iris-virginica
123+
7.7,2.8,6.7,2.0,Iris-virginica
124+
6.3,2.7,4.9,1.8,Iris-virginica
125+
6.7,3.3,5.7,2.1,Iris-virginica
126+
7.2,3.2,6.0,1.8,Iris-virginica
127+
6.2,2.8,4.8,1.8,Iris-virginica
128+
6.1,3.0,4.9,1.8,Iris-virginica
129+
6.4,2.8,5.6,2.1,Iris-virginica
130+
7.2,3.0,5.8,1.6,Iris-virginica
131+
7.4,2.8,6.1,1.9,Iris-virginica
132+
7.9,3.8,6.4,2.0,Iris-virginica
133+
6.4,2.8,5.6,2.2,Iris-virginica
134+
6.3,2.8,5.1,1.5,Iris-virginica
135+
6.1,2.6,5.6,1.4,Iris-virginica
136+
7.7,3.0,6.1,2.3,Iris-virginica
137+
6.3,3.4,5.6,2.4,Iris-virginica
138+
6.4,3.1,5.5,1.8,Iris-virginica
139+
6.0,3.0,4.8,1.8,Iris-virginica
140+
6.9,3.1,5.4,2.1,Iris-virginica
141+
6.7,3.1,5.6,2.4,Iris-virginica
142+
6.9,3.1,5.1,2.3,Iris-virginica
143+
5.8,2.7,5.1,1.9,Iris-virginica
144+
6.8,3.2,5.9,2.3,Iris-virginica
145+
6.7,3.3,5.7,2.5,Iris-virginica
146+
6.7,3.0,5.2,2.3,Iris-virginica
147+
6.3,2.5,5.0,1.9,Iris-virginica
148+
6.5,3.0,5.2,2.0,Iris-virginica
149+
6.2,3.4,5.4,2.3,Iris-virginica
150+
5.9,3.0,5.1,1.8,Iris-virginica
+110
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
package kmeans
2+
3+
/*
4+
This module provides common distance functions for measuring distance
5+
between observations.
6+
7+
Minkowski Distance is one the most inclusive among one as other distances are only
8+
a specific case of Minkowski Distance(Chebyshev Distance is not straightforward, though).
9+
10+
when p=1 in MinkowskiDistance, it becomes ManhattanDistance,
11+
when p=2 in MinkowskiDistance, it becomes EuclideanDIstance,
12+
when p goes infinity, it becomes ChebyshevDistance.
13+
14+
Since the ManhattanDistance and EuclideanDistance are very frequently used, they are
15+
implemented separately.
16+
*/
17+
18+
import (
19+
"math"
20+
)
21+
22+
// Lp Norm of an array, given p >= 1
23+
func LPNorm(vector []float64, p float64) (float64, error) {
24+
distance := 0.
25+
for _, jj := range vector {
26+
distance += math.Pow(math.Abs(jj), p)
27+
}
28+
return math.Pow(distance, 1/p), nil
29+
}
30+
31+
// 1-norm distance (l_1 distance)
32+
func ManhattanDistance(firstVector, secondVector []float64) (float64, error) {
33+
distance := 0.
34+
for ii := range firstVector {
35+
distance += math.Abs(firstVector[ii] - secondVector[ii])
36+
}
37+
return distance, nil
38+
}
39+
40+
// 2-norm distance (l_2 distance)
41+
func EuclideanDistance(firstVector, secondVector []float64) (float64, error) {
42+
distance := 0.
43+
for ii := range firstVector {
44+
distance += (firstVector[ii] - secondVector[ii]) * (firstVector[ii] - secondVector[ii])
45+
}
46+
return math.Sqrt(distance), nil
47+
}
48+
49+
// Higher weight for the points that are far apart
50+
// Not a real metric as it does not obey triangle inequality
51+
func SquaredEuclideanDistance(firstVector, secondVector []float64) (float64, error) {
52+
distance, err := EuclideanDistance(firstVector, secondVector)
53+
return distance * distance, err
54+
}
55+
56+
// p-norm distance (l_p distance)
57+
func MinkowskiDistance(firstVector, secondVector []float64, p float64) (float64, error) {
58+
distance := 0.
59+
for ii := range firstVector {
60+
distance += math.Pow(math.Abs(firstVector[ii]-secondVector[ii]), p)
61+
}
62+
return math.Pow(distance, 1/p), nil
63+
}
64+
65+
// p-norm distance with weights (weighted l_p distance)
66+
func WeightedMinkowskiDistance(firstVector, secondVector, weightVector []float64, p float64) (float64, error) {
67+
distance := 0.
68+
for ii := range firstVector {
69+
distance += weightVector[ii] * math.Pow(math.Abs(firstVector[ii]-secondVector[ii]), p)
70+
}
71+
return math.Pow(distance, 1/p), nil
72+
}
73+
74+
// infinity norm distance (l_inf distance)
75+
func ChebyshevDistance(firstVector, secondVector []float64) (float64, error) {
76+
distance := 0.
77+
for ii := range firstVector {
78+
if math.Abs(firstVector[ii]-secondVector[ii]) >= distance {
79+
distance = math.Abs(firstVector[ii] - secondVector[ii])
80+
}
81+
}
82+
return distance, nil
83+
}
84+
85+
func HammingDistance(firstVector, secondVector []float64) (float64, error) {
86+
distance := 0.
87+
for ii := range firstVector {
88+
if firstVector[ii] != secondVector[ii] {
89+
distance++
90+
}
91+
}
92+
return distance, nil
93+
}
94+
95+
func BrayCurtisDistance(firstVector, secondVector []float64) (float64, error) {
96+
numerator, denominator := 0., 0.
97+
for ii := range firstVector {
98+
numerator += math.Abs(firstVector[ii] - secondVector[ii])
99+
denominator += math.Abs(firstVector[ii] + secondVector[ii])
100+
}
101+
return numerator / denominator, nil
102+
}
103+
104+
func CanberraDistance(firstVector, secondVector []float64) (float64, error) {
105+
distance := 0.
106+
for ii := range firstVector {
107+
distance += (math.Abs(firstVector[ii]-secondVector[ii]) / (math.Abs(firstVector[ii]) + math.Abs(secondVector[ii])))
108+
}
109+
return distance, nil
110+
}

0 commit comments

Comments
 (0)