-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathGaussian_PDFs_pruned.m
255 lines (199 loc) · 12.2 KB
/
Gaussian_PDFs_pruned.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
%% Symbolic calculation and numeric simulation for gaussian distriutions with pruning
clear all
close all
%% Parameters
N = 10000000; %number of pair connections
mean_value = 0.5; %mean value of the gaussian distribution of connections
standard_deviation = 1/10; %sqrt of variance. With 1/10 we are considering up to 5*sd. 3*sd is 99.7%
a = 0:0.1:0.9; %pruning values
number_points = size(a,2);
%% Plotting parameters
numericFontSize = 25;
axesFontSize = 30;
lineThickness = 2;
markLine = 1;
markSize = 12;
minv = [ 0, 0, -1, 0];
maxv = [ 1, 2, 1, 1];
n_bins = 200;
bin_dim = (maxv-minv) / n_bins;
%% Parameters and variables for symbolic calculation
syms u v w %symbolic variables
mu = [mean_value, 2*mean_value, 0, 0 ]; %values used to create the functions: single, sum, difference, absolute difference
va = [standard_deviation^2, 2*standard_deviation^2, 2*standard_deviation^2, 2*standard_deviation^2]; %values used to create the functions: single, sum, difference, absolute difference
mu_sym = zeros(1, size(mu,2)); %mean value obtained by symbolic integration
va_sym = zeros(1, size(va,2)); %variance obtained by symbolic integration
mu_bid_sym = zeros(1,2); %mean value obtained by symbolic integration
va_bid_sym = zeros(1,2); %variance obtained by symbolic integration
tol=0.99; %tolerance for the integration of the distribution
%% Printing vector
variable = char('x, y', 'x+y', 'x-y', '|x-y|'); %creates a matrix where each string is spread over a row, every character being an element of the matrix
x_axis_name = char ('w','Z_2','diff','Z_1');
y_axis_name = char ('f(w)','f(Z_2)','f(diff)','f(Z_1)');
figure_name = char ('Gaussian_initialPDF_prune0_1','Gaussian_sumPDF_prune0_1','Gaussian_diffPDF_prune0_1','Gaussian_absdiffPDF_prune0_1');
%% Sampling
for n = 1:number_points
%% Simulation samples
x_unp = normrnd(mean_value, standard_deviation, 1, N) .* (rand(1,N) > a(n));
y_unp = normrnd(mean_value, standard_deviation, 1, N) .* (rand(1,N) > a(n));
x = x_unp((x_unp+y_unp)~=0); %uses logical index to cut away the zero elements from the distributions when they have the same index in x and y (w_{ij}=w_{ji}=0)
y = y_unp((x_unp+y_unp)~=0); %uses logical index to cut away the zero elements from the distributions when they have the same index in x and y (w_{ij}=w_{ji}=0)
distr = [ x; x+y; x-y; abs(x-y)]; %vector of distributions
mean_val = [mean(x), mean(x+y), mean(x-y), mean(abs(x-y))]; %corresponding mean value
variance = [ var(x), var(x+y), var(x-y), var(abs(x-y))]; %corresponding variance
index_zero_x = find(x==0);
index_zero_y = find(y==0);
x_peak = [x(index_zero_x), x(index_zero_y)];
y_peak = [y(index_zero_x), y(index_zero_y)];
mean_peak = mean(x_peak+y_peak);
var_peak = var(x_peak+y_peak);
%% Single distributions
for i = 1:size(distr,1)
sample = minv(i):bin_dim(i):maxv(i);
%---SYMBOLIC
f1 = exp(-((u-mu(1))^2/(2*va(1)))) / sqrt(2*pi*va(1));
f2 = exp(-((u-mu(i))^2/(2*va(i)))) / sqrt(2*pi*va(i));
%Single distribution area check
area_f2 = double(int(f2,minv(i),maxv(i)));
if area_f2 > 1
display('Error, something is going wrong: the integral of a distribution cannot be larger than 1')
elseif area_f2 < tol
f2 = f2 / area_f2;
end
%overall distribution
f = (2*a(n)/(1+a(n)))*f1 + ((1-a(n))/(1+a(n)))*f2;
area = int(f,minv(i),maxv(i));
%area check, normalization and sampling
if double(area) > 1
display('Error, something is going wrong: the integral of a distribution cannot be larger than 1')
elseif double(area) < tol
f = f / double(area);
val = (((2*a(n)/(1+a(n)))*normpdf(sample,mu(1),sqrt(va(1))))+(((1-a(n))/(1+a(n)))*normpdf(sample,mu(i),sqrt(va(i)))/area_f2)) / double(area);
area = int(f,minv(i),maxv(i));
else
val = (((2*a(n)/(1+a(n)))*normpdf(sample,mu(1),sqrt(va(1))))+(((1-a(n))/(1+a(n)))*normpdf(sample,mu(i),sqrt(va(i)))/area_f2));
end
%mean and variance by integration - for the absolute difference they have to be different from the ones used to create the function
mu_sym(i) = int(f*u,minv(i),maxv(i));
va_sym(i) = int(f*(u-mu_sym(i))^2,minv(i),maxv(i));
%---SIMULATION
[counts,bin_location] = hist(distr(i,:),n_bins); %Note: bin_location==sample
bin_dim_hist = ( max(bin_location) - min(bin_location) ) / n_bins;
%print and plot
sprintf('Pruning a = %f. \nVariable: %s \nSimulation with N=%d pair connections: Mean %f. Variance %f. \nSymbolic calculation: Integral of the distribution with mean %f and variance %f: %f', a(n), variable(i,:), N, mean_val(i), variance(i), mu_sym(i), va_sym(i), double(area))
if n==1 || n==2 || n==6
figure;
bar(bin_location, counts/(sum(bin_dim_hist*counts))); %simulation --- Note: sum(bin_dim(i)*counts) is no longer equal to bin_dim*N because of the pruning
hold on
if i == 1
h = plot(sample,(1-a(n))*val); %theoretical
else
h = plot(sample,val); %theoretical
end
set(h, 'color', 'k', 'LineWidth', lineThickness);
set(gca,'fontsize',numericFontSize);
xlabel(x_axis_name(i, find(x_axis_name(i,:) ~= ' ')),'fontsize',axesFontSize);
ylabel(y_axis_name(i, find(y_axis_name(i,:) ~= ' ')),'fontsize',axesFontSize);
axis([minv(i)-0.5 maxv(i)+0.5 0 ceil(max( counts/(sum(bin_dim_hist*counts)) ))]);
colormap(gray);
caxis([-1 1.4]);
end
if (n == 2) && (i ~= 3)
print(gcf, '-depsc2', '-loose', figure_name(i, find(figure_name(i,:) ~= ' '))); % Print the figure in eps (first option) and uncropped (second object)
end
end
%% Joint distributions-bivariate gaussian
samplex = minv(2):bin_dim(2):maxv(2); %first distribution is the sum
sampley = minv(4):bin_dim(4):maxv(4); %second distribution is or the difference or the absolute difference
if n==1
%covariance and correlation from the simulation
covariance = sum((distr(2,:)-mean_val(2)).*(distr(4,:)-mean_val(4)))/N;
corr = covariance/(sqrt(variance(2))*sqrt(variance(4)));
sigma = [va(2), corr*sqrt(va(2))*sqrt(va(4)); corr*sqrt(va(2))*sqrt(va(4)), va(4)];
%---SYMBOLIC
g_un = exp( - (((v-mu(2))^2/(va(2))) + ((w-mu(4))^2/(va(4))) - 2*corr*(v-mu(2))*(w-mu(4))/(sqrt(va(2))*sqrt(va(4))) ) / (2*(1-(corr^2))) ) / ( 2*pi*sqrt (va(2)*va(4)*(1-(corr^2))) );
%g_un = exp(-((v-mu(2))^2/(2*va(2)))-((w-mu(4))^2/(2*va(4))))/(2*pi*sqrt(va(2)*va(4)));
area_bid = int((int(g_un,v,minv(2),maxv(2))),w,minv(4),maxv(4));
%area check, normalization and sampling
if double(area_bid)>1
display('Error, something is going wrong: the integral of a distribution cannot be larger than 1')
elseif double(area_bid)<tol
g_un = g_un/double(area_bid);
[X1,X2] = meshgrid(samplex,sampley);
val = mvnpdf([X1(:) X2(:)], [mu(2),mu(4)], sigma)/double(area_bid); %Note: mvnpdf returns a vector evaluating the PDF at each row of [X1(:) X2(:)]
val = reshape(val,length(samplex),length(sampley));
%area = int((int(g_un,v,minv(2),maxv(2))),w,minv(4),maxv(4));
else
[X1,X2] = meshgrid(samplex,sampley);
val = mvnpdf([X1(:) X2(:)], [mu(2),mu(4)], sigma); %Note: mvnpdf returns a vector evaluating the PDF at each row of [X1(:) X2(:)]
val = reshape(val,length(samplex),length(sampley));
end
%mean and variance by integration - for the absolute difference they have to be different from the ones used to create the function
mu_bid_sym(1,1) = int((int(g_un*v,v,minv(2),maxv(2))),w,minv(4),maxv(4));
mu_bid_sym(1,2) = int((int(g_un*w,v,minv(2),maxv(2))),w,minv(4),maxv(4));
va_bid_sym(1,1) = int((int(g_un*(v-mu_bid_sym(1,1))^2,v,minv(2),maxv(2))),w,minv(4),maxv(4));
va_bid_sym(1,2) = int((int(g_un*(w-mu_bid_sym(1,2))^2,v,minv(2),maxv(2))),w,minv(4),maxv(4));
else
g2 = 0;
g = ((1-a(n))/(1+a(n)))*g_un + (2*a(n)/(1+a(n)))*g2;
[X1,X2] = meshgrid(samplex,sampley);
val = ((1-a(n))/(1+a(n)))*mvnpdf([X1(:) X2(:)], [mu(2),mu(4)], sigma)/double(area_bid); %Note: mvnpdf returns a vector evaluating the PDF at each row of [X1(:) X2(:)]
val = reshape(val,length(samplex),length(sampley));
%area = int((int(g,v,minv(2),maxv(2))),w,minv(4),maxv(4));
end
%---SIMULATION
grid{1} = samplex;
grid{2} = sampley;
%[bidim_counts,Cell_location] = hist3([(distr(2,:))', (distr(i,:))'], grid);
[bidim_counts,Cell_location] = hist3([(distr(2,:))', (distr(i,:))'], [n_bins, n_bins]);
bin_dim_hist_1 = ( max(Cell_location{1}) - min(Cell_location{1}) ) / n_bins;
bin_dim_hist_2 = ( max(Cell_location{2}) - min(Cell_location{2}) ) / n_bins;
% %---PEAK DISTRIBUTION (without pruning)
% %simulation
% [bidim_counts_peak,Cell_location_peak] = hist3([(x_peak+y_peak)', (abs(x_peak-y_peak))'], grid);
% surf(Cell_location_peak{1},Cell_location_peak{2},(bidim_counts_peak/max(max(bidim_counts_peak)))'/(2*pi*va(2))); %(2*pi*va(2)) is the normalization from theory
% hold on
%
% %theory
% mu_peak = [0.5 0.5];
% va_peak = [(va(2)), 0; 0, (va(4))]; %covariance matrix
% X_peak = mvnrnd(mu_peak,va_peak,10000000);
% [counts_peak,Cell_peak] = hist3(X_peak, grid);
% mesh(Cell_peak{1},Cell_peak{2},(counts_peak)'/(sum(sum(bin_dim(2)*bin_dim(4)*counts_peak))));
% axis([0 1 0 1 0 10]);
%print and plot
sprintf('Pruning a = %f. \nVariables: %s, %s \nSimulation with N=%d pair connections: Mean %f, %f. Variance %f, %f. \nSymbolic calculation: Integral of the distribution with mean %f, %f and variance %f, %f: %f', a(n), variable(2,:), variable(4,:), N, mean_val(2), mean_val(4), variance(2), variance(4), mu_bid_sym(1,1), mu_bid_sym(1,2), va_bid_sym(1,1), va_bid_sym(1,2), double(area))
if n==1 || n==2 || n==6
figure;
mesh(Cell_location{1},Cell_location{2},bidim_counts'/(sum(sum(bin_dim_hist_1*bin_dim_hist_2*bidim_counts)))) %simulation
hold on
surf(samplex,sampley,val); %theoretical
set(gca,'fontsize',numericFontSize);
axis([0 2 0 1]);
%xlabel('Z_2','fontsize',axesFontSize);
%ylabel('Z_1','fontsize',axesFontSize);
%zlabel('f(Z_1,Z_2)','fontsize',axesFontSize);
view([130 14]);
colormap(gray);
caxis([0 140]);
end
if (n == 2)
print(gcf, '-depsc2', '-loose', 'Gaussian_jointPDF_prune0_1'); % Print the figure in eps (first option) and uncropped (second object)
end
end
%%%%%-----Checking
%Simulation. Verify that all the points of the distribution lie in the range [0,1]x[abs(x-y),2-abs(x-y)] --- Few points can lie outside, those that in the formers distributions x and y are outside [0,1]
k=0;
m=0;
for i=1:size(distr,2)
if and(((distr(2,i))>=(distr(4,i))),((distr(2,i))<=2-(distr(4,i))))
k=k+1;
else
m=m+1;
%display(i)
end
end
%Symbolic. Verify that the integral in the range [0,1]x[abs(x-y),2-abs(x-y)] is equal to 1
check = int((int(g_un,v,w,(2-w))),w,0,1);
%Print
sprintf('Checking the joint distribution f(x+y,|x-y|) in the definition interval [0,1]x[|x-y|,2-|x-y|]. \nSimulation. Number of points of the distribution outside: %d out of %f. \nSymbolic calculation. Area of the joint distribution: %f', m, N, double(check))