-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRBFN_Approximation.py
162 lines (127 loc) · 4.19 KB
/
RBFN_Approximation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#!/usr/bin/python
# Radial Basis Function - Approximation
# Gaussian function as activation function.
# Centroids are found using K-Means Clustring.
# Normalized spread(sigma) is calculated form these centroids and is used as common spread for all centroids.
# Lloyd's(pseudo inverse) method is used to obtain optimal output weights.
# These obtained output weights are used to approximate the testing data.
# Cross Validation is used for testing our model.
# 75% of train dataset is used for training and remaining 25% is used for testing purpose.
import numpy as np
from random import randint
from math import sqrt,exp
import matplotlib.pyplot as plt
# To get no. of inputs lying within perticular centroid(center).
def get_dims(memberships, centroid_num):
m = memberships.shape[0]
row_dim = 0
for i in range(m):
if memberships[i][0]==centroid_num:
row_dim += 1
return row_dim
# To obtain the centroids.
def computeCentroids(x, prev_centroids, memberships, k):
m, n = x.shape
centroids = np.zeros(shape=(k, n))
for i in range(k):
if not np.any(memberships==i):
centroids[i,:] = prev_centroids[i,:]
else:
divisor = get_dims(memberships, i)
prices = np.zeros(shape=(m,n))
for j in range(m):
if memberships[j][0]==i:
prices[j,:]=x[j,:]
else:
prices[j,:]=0
centroids[i,:] = (np.sum(prices,axis=0))/divisor
return centroids
# To obtain membership matrix which is a matrix mentioning
# which centroid is closest to the given input.
def findClosestCentroids(x, centroids):
k = centroids.shape[0]
m = x.shape[0]
memberships = np.zeros(shape=(m,1))
distances = np.zeros(shape=(m,k))
for i in range(k):
diffs = np.zeros(shape=(m,x.shape[1]))
for j in range(m):
diffs[j:] = x[j,:] - centroids[i,:]
sqrdDiffs = diffs**2
temp = np.array([np.sum(sqrdDiffs,axis=1)]).T
for iter in range(m):
distances[iter][i] = temp[iter][0]
for i in range(m):
memberships[i][0] = np.where(distances==min(distances[i,:]))[1][0]
return memberships
# At first initializing the centroids randomly.
def KMeansInitCentroids(x,k):
centroids = np.zeros(shape=(k,x.shape[1]))
randidx = np.random.permutation(100)
centroids = x[randidx[0:k],:]
return centroids
# K-Means Clustring
def KMeans(x, initial_centroids, max_iters):
k = initial_centroids.shape[0]
centroids = initial_centroids
prevCentroids = centroids
for i in range(max_iters):
memberships = findClosestCentroids(x,centroids)
centroids = computeCentroids(x, centroids, memberships, k)
if (prevCentroids==centroids).all():
break
prevCentroids = centroids
return centroids,memberships
if __name__ == '__main__':
# Load file containing training data set.
NTrain = np.loadtxt('xyz.tra',dtype = float)
print "loadfile:shape",NTrain.shape
m,n = NTrain.shape
NTD = m
NTD=(NTD*3)/4
# inp = no. of input neurons i.e. input features/dimensions.
inp = n-1
numRBFNeurons = 10
x_train = NTrain[0:NTD,0:inp]
y_train = NTrain[0:NTD,inp:]
init_centroids = KMeansInitCentroids(x_train,numRBFNeurons)
centers,memberships = KMeans(x_train,init_centroids,100)
# Obtaining the normalized spread.
maxi = 0
for i in range(numRBFNeurons-1):
for j in range(i+1,numRBFNeurons):
dist = centers[i,:] - centers[j,:]
sqrdist = dist**2
temp = np.sum(sqrdist)
if temp>maxi:
maxi = temp
sigma = maxi/sqrt(numRBFNeurons)
# Obtaining output weights using Lloyd's(pseudo inverse) method.
pseudo = np.zeros(shape=(NTD,numRBFNeurons+1))
pseudo[:,numRBFNeurons] = 1
for i in range(NTD):
for j in range(numRBFNeurons):
dist = x_train[i,:] - centers[j,:]
sqrdist = dist**2
divident = np.sum(sqrdist)
gauss = divident / (2*(sigma**2))
pseudo[i][j] = exp(-gauss)
weight = np.linalg.pinv(pseudo).dot(y_train)
# Testing the network.
x_test = NTrain[NTD:,0:inp]
y_test = NTrain[NTD:,inp:]
NTD = m - NTD
pseudo = np.zeros(shape=(NTD,numRBFNeurons+1))
pseudo[:,numRBFNeurons] = 1
for i in range(NTD):
for j in range(numRBFNeurons):
dist = x_test[i,:] - centers[j,:]
sqrdist = dist**2
divident = np.sum(sqrdist)
gauss = divident / (2*(sigma**2))
pseudo[i][j] = exp(-gauss)
sumerr = 0
y_predicited = pseudo.dot(weight)
err = y_test - y_predicited
sumerr = sumerr + np.sum(err**2)
print sqrt(sumerr/NTD)