0001 function [Er,M,nb] = gmmbvl_kmeans(X,T,kmax,dyn,bs, killing, pl)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027 Er=[]; TEr=[];
0028
0029 [n,d] = size(X);
0030
0031 THRESHOLD = 1e-4;
0032 nb = 0;
0033
0034
0035 if dyn==1
0036 k = 1;
0037 M = mean(X);
0038 K = gmmbvl_sqdist(X',X');
0039 L = X;
0040 elseif dyn==2
0041 k = kmax;
0042 M = kdtree(X,[1:n]',[],1.5*n/k);
0043 nb = size(M,1);
0044 dyn = 0;
0045 elseif dyn==3
0046 L = kdtree(X,[1:n]',[],1.5*n/bs);
0047 nb = size(L,1);
0048 k = 1;
0049 M = mean(X);
0050 K = gmmbvl_sqdist(X',L');
0051 elseif dyn==4
0052 k = 1;
0053 M = mean(X);
0054 K = gmmbvl_sqdist(X',X');
0055 L = X;
0056 else
0057 k = kmax;
0058 tmp = randperm(n);
0059 M = X(tmp(1:k),:);
0060 end
0061
0062 Wold = realmax;
0063
0064 while k <= kmax
0065 kill = [];
0066
0067
0068 Dist = gmmbvl_sqdist(M',X');
0069
0070
0071 [Dwin,Iwin] = min(Dist',[],2);
0072
0073
0074 Wnew = sum(Dwin);
0075
0076
0077 for i=1:size(M,1)
0078 I = find(Iwin==i);
0079 if size(I,1)>d
0080 M(i,:) = mean(X(I,:));
0081 elseif killing==1
0082 kill = [kill; i];
0083 end
0084 end
0085
0086 if 1-Wnew/Wold < THRESHOLD*(10-9*(k==kmax))
0087 if dyn & k < kmax
0088
0089 if dyn == 4
0090 best_Er = Wnew;
0091
0092 for i=1:n;
0093 Wold = Inf;
0094 Wtmp = Wnew;
0095 Mtmp = [M; X(i,:)];
0096 while (1-Wtmp/Wold) > THRESHOLD*10;
0097 Wold = Wtmp;
0098 Dist = gmmbvl_sqdist(Mtmp',X');
0099 [Dwin,Iwin] = min(Dist',[],2);
0100 Wtmp = sum(Dwin);
0101 for i = 1 : size(Mtmp,1)
0102 I = find(Iwin==i);
0103 if size(I,1)>d; Mtmp(i,:) = mean(X(I,:)); end
0104 end
0105 end
0106 if Wtmp < best_Er; best_M = Mtmp; best_Er = Wtmp; end
0107 end
0108
0109 M = best_M;
0110 Wnew = best_Er;
0111 if ~isempty(T); tmp=gmmbvl_sqdist(T',M'); TEr=[TEr; sum(min(tmp,[],2))];end;
0112 Er=[Er; Wnew];
0113 k = k+1;
0114
0115 else
0116
0117 [tmp,new] = max(sum(max(repmat(Dwin,1,size(K,2))-K,0)));
0118 k = k+1;
0119 M = [M; L(new,:)+eps];
0120 if pl; fprintf( 'new cluster, k=%d\n', k); end
0121 [Dwin,Iwin] = min(Dist',[],2);
0122 Wnew = sum(Dwin);Er=[Er; Wnew];
0123 if ~isempty(T); tmp=gmmbvl_sqdist(T',M'); TEr=[TEr; sum(min(tmp,[],2))];end;
0124 end
0125 else
0126 k = kmax+1;
0127 end
0128 end
0129 Wold = Wnew;
0130 if pl
0131 figure(1); plot(X(:,1),X(:,2),'g.',M(:,1),M(:,2),'k.',M(:,1),M(:,2),'k+');
0132 drawnow;
0133 end
0134 end
0135
0136 Er=[Er; Wnew];
0137 if ~isempty(T); tmp=gmmbvl_sqdist(T',M'); TEr=[TEr; sum(min(tmp,[],2))]; Er=[Er TEr];end;
0138 M(kill,:)=[];
0139
0140
0141
0142 function varargout = kdtree(varargin);
0143 error('gmmbvl_kmeans:kdtree was called, but there is no implementation. This is an internal error.');