function [J, mu, c] = mykmeans(data, k)
% Hand coded k-means clustering algorithm
% INPUT:
% data = m-by-n data matrix containing m data points in R^n
% k = number of clusters
% OUTPUT:
% J = m-by-1 vector containing numbers {1,2,,k} with the following
% meaning: data point data(i,:) belongs to cluster J(i)
% mu = k-by-n matrix containing k cluster centres in the rows
% c = k-by-1 vector containing the sizes of the clusters; i.e., c(1) is the
% number of points in cluster 1 and so on
m = size(data,1);
% Some trivial checks
if k > m
display(You requested more clusters than there are data points);
J = []; mu = []; c = [];
return; % This is the statement that exits the function
end
% if # points = # clusters, put each point into its own cluster
if k == m
J = 1:k; J = J; mu = J; c = ones(k,1);
return;
end
% – main Algorithm
% – continue only if # points > # clusters
% randomly pick k numbers out of {1,, m}
rnd_idx = randsample(m,k);
% NOTE: if Matlab doesnt have randsample, use RandomSubset.m instead
%rnd_idx = RandomSubset(m,k);
% and use those entries in the data matrix as the initial cluster centers
mu = data(rnd_idx,:);
c = ones(k,1);
for l = 1:200 % iterate k means 200 times
% STEP 3.1: assign each point to closest mean
for i=1:m % cycle through points
J(i) = 1; % initially assign point i into cluster 1
min = norm(data(i,:) mu(1,:)); % distance of point i from centre of cluster 1
for j=2:k % cycle through means / cluster centres
r = norm(data(i,:) mu(j,:)); % compute distance of point i from j-th centre
if r < min % if i-th point is closer j-th centre to than all previous centres…J(i) = j; % …assign it to the cluster defined by centre jmin = r; % new closest distance is rendendend% STEP 3.2: generate new means as centers of the new clustersfor j=1:kI = find(J==j); % find indices of all points belonging to cluster jc(j) = length(I);if c(j)==0% do nothing, i.e., keep mu(j,:)display(‘Empty cluster!’)else% average all points belonging to cluster jX = data(I’,:);if size(X,1) > 1
mu(j,:) = sum(X)/c(j); % average of all points belonging to cluster j
else % X contains a single row only
mu(j,:) = X;
end
end
end
end
Reviews
There are no reviews yet.