# Single linkage hirerarichal Algorithm
%Cluster reads data from a file called data.dat. It will arbitrarily
%choose one point to be a hub and cluster all the points around this hub.
%It then finds the point farthest away from the hub and makes this point a
%new hub. Next it clusters the data around the hub it is nearest. This
%process is repeated until the distance from every point to its hub is
%less than half the average distance between all pairs of hubs.
load data.dat;
graph_data; %Plots original points
[clusters,dist,hubs] = setup(data); %Clusters all points around first point
counter =1; %Keeps track of number of the present number of hubs
continue = 1; % 1=true 0=false indicates whether to continue forming new
%hubs
while continue
counter = counter + 1; % adding new hub
[m,i]=max(dist); %m= maximum value in the distance array
%i is the location of the maximum value in the
% array
hubs(counter)=i; %assigns index to new hub
[dist,clusters] = recluster(data,dist,clusters,i); %Clusters points
%to nearest hub
pause
redraw %Draws new clusters
maxdist=max(dist); %returns distance of point farthest from its hub
continue = farout(counter,hubs,data,maxdist); %Checks the stop condition
end
%Set up function.
function [clusters,dist,hubs]=setup(data)
%SETUP [clusters,dist,hubs]=setup(data)
% This function assigns to the cluster array all ones, (telling that
% all points belong to Hub 1), assigns to the hub array a one in
% position 1 (telling that the first hub is point 1) and zeros in all
% other locations. The distance array contains the square of the
% distance from each point to hub one.
n=size(data,1);
clusters=ones(n,1);
hubs=zeros(n,1);
hubs(1,1)=1;
dist = distance(data,clusters);
end
%distance function.
function dist=distance(d,c)
%DIST Distance from hub dist=distance(d,c)
% This finds the square of the distance between each point and its hub
% d = data coordinates of points; c = cluster array contains index of
% hub to which point is assigned; c(i)=j point i belongs to the cluster
% whose hub is point j
n = size(d,1);
for i = 1:n
dif(i,:)= d(c(i),:) - d(i,:);
end
dist =( sum((dif.*dif)'))'; %sums the squares of the row elements
end
%Far out function
function continue = farout(counter,hubs,data,maxdist)
%FAROUT continue = farout(counter,hubs,data,maxdist)
% Calculates stop condition. Stops if the point farthest from its
% hub is within the average distance value.
index = 0;
for i = 1:(counter-1)
for j = i+1 : counter
index = index + 1;
dif(index,:)=data(hubs(i),:) - data(hubs(j),:);
end
end
dist
=sqrt((sum
((dif.
*dif
)'))');average_dist = sum(dist)/(2*index);
if sqrt(maxdist
) < average_dist
continue = 0;
else continue =1;
end
end % end for farout
%Graph_data plots the points and draws the axes
x = data(:,1);
y = data(:,2);
minx = min(x);
maxx = max(x);
miny = min(y);
maxy = max(y);
plot(x,y,'*')
axis([minx-1, maxx+1, miny - 1, maxy + 1]);
% Redraw draws the points so that each cluster is a different color.
% The hubs are represented by a + and the members are represented by a *.
n = size(data,1);
pointer = zeros(n, 1);
for i=1:counter
pointer(hubs(i))=i; % Pointer's indices are the data point indices
% Pointer's cells are the hub numbers for the points
end;
hold off;
% Color code the points based on the cluster number
for i = 1:n
x = data(i,1);
y = data(i,2);
if pointer(clusters(i)) == 1
if clusters(i) == i
plot(x, y, 'y+');
else
plot(x, y, 'y*');
end
hold on;
elseif pointer(clusters(i)) == 2
if clusters(i) == i
plot(x, y, 'm+');
else
plot(x, y, 'm*');
end
hold on;
elseif pointer(clusters(i)) == 3
if clusters(i) == i
plot(x, y, 'c+');
else
plot(x, y, 'c*');
end
hold on
elseif pointer(clusters(i)) == 4
if clusters(i) == i
plot(x, y, 'r+');
else
plot(x, y, 'r*');
end
hold on
elseif pointer(clusters(i)) == 5
if clusters(i) == i
plot(x, y, 'g+');
else
plot(x, y, 'g*');
end
hold on
elseif pointer(clusters(i)) == 6
if clusters(i) == i
plot(x, y, 'b+');
else
plot(x, y, 'b*');
end
hold on
else
if clusters(i) == i
plot(x, y, 'w+');
else
plot(x, y, 'w*');
end
hold on
end
end
% Sets up the axes
x = data(:,1);
y = data(:,2);
minx = min(x);
maxx = max(x);
miny = min(y);
maxy = max(y);
axis([minx-1, maxx+1, miny - 1, maxy + 1]);
IyBTaW5nbGUgbGlua2FnZSBoaXJlcmFyaWNoYWwgQWxnb3JpdGhtCgolQ2x1c3RlciByZWFkcyBkYXRhIGZyb20gYSBmaWxlIGNhbGxlZCBkYXRhLmRhdC4gIEl0IHdpbGwgYXJiaXRyYXJpbHkKJWNob29zZSBvbmUgcG9pbnQgdG8gYmUgYSBodWIgYW5kIGNsdXN0ZXIgYWxsIHRoZSBwb2ludHMgYXJvdW5kIHRoaXMgaHViLgolSXQgdGhlbiBmaW5kcyB0aGUgcG9pbnQgZmFydGhlc3QgYXdheSBmcm9tIHRoZSBodWIgYW5kIG1ha2VzIHRoaXMgcG9pbnQgYQolbmV3IGh1Yi4gTmV4dCBpdCBjbHVzdGVycyB0aGUgZGF0YSBhcm91bmQgdGhlIGh1YiBpdCBpcyBuZWFyZXN0LiBUaGlzCiVwcm9jZXNzIGlzIHJlcGVhdGVkICB1bnRpbCB0aGUgZGlzdGFuY2UgZnJvbSBldmVyeSBwb2ludCB0byBpdHMgaHViIGlzCiVsZXNzIHRoYW4gaGFsZiB0aGUgYXZlcmFnZSBkaXN0YW5jZSBiZXR3ZWVuIGFsbCBwYWlycyBvZiBodWJzLgoKbG9hZCBkYXRhLmRhdDsKZ3JhcGhfZGF0YTsgICAgICAgICAgICAlUGxvdHMgb3JpZ2luYWwgcG9pbnRzCltjbHVzdGVycyxkaXN0LGh1YnNdID0gc2V0dXAoZGF0YSk7ICAlQ2x1c3RlcnMgYWxsIHBvaW50cyBhcm91bmQgZmlyc3QgcG9pbnQKCmNvdW50ZXIgPTE7ICAgICAgICAgICAgICVLZWVwcyB0cmFjayBvZiBudW1iZXIgb2YgdGhlIHByZXNlbnQgbnVtYmVyIG9mIGh1YnMKY29udGludWUgPSAxOyAgICUgMT10cnVlIDA9ZmFsc2UgaW5kaWNhdGVzIHdoZXRoZXIgdG8gY29udGludWUgZm9ybWluZyBuZXcKICAgICAgICAgICAgICAgICVodWJzCgogICAgd2hpbGUgY29udGludWUKICAgICAgICBjb3VudGVyID0gY291bnRlciArIDE7ICAgICUgYWRkaW5nIG5ldyBodWIKICAgICAgICBbbSxpXT1tYXgoZGlzdCk7ICAgICAgICAlbT0gbWF4aW11bSB2YWx1ZSBpbiB0aGUgZGlzdGFuY2UgYXJyYXkKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICVpIGlzIHRoZSBsb2NhdGlvbiBvZiB0aGUgbWF4aW11bSB2YWx1ZSBpbiB0aGUKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAlIGFycmF5CiAgICAgICAgaHVicyhjb3VudGVyKT1pOyAgICAgICAgJWFzc2lnbnMgaW5kZXggdG8gbmV3IGh1YgoKICAgICAgICBbZGlzdCxjbHVzdGVyc10gPSByZWNsdXN0ZXIoZGF0YSxkaXN0LGNsdXN0ZXJzLGkpOyAgJUNsdXN0ZXJzIHBvaW50cwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICV0byBuZWFyZXN0IGh1YgogICAgICAgIHBhdXNlCiAgICAgICAgcmVkcmF3ICAgICAgICAgICAgICAgICAgJURyYXdzIG5ldyBjbHVzdGVycwogICAgICAgIG1heGRpc3Q9bWF4KGRpc3QpOyAgICAgICVyZXR1cm5zIGRpc3RhbmNlIG9mIHBvaW50IGZhcnRoZXN0IGZyb20gaXRzIGh1YgogICAgICAgIGNvbnRpbnVlID0gZmFyb3V0KGNvdW50ZXIsaHVicyxkYXRhLG1heGRpc3QpOyAlQ2hlY2tzIHRoZSBzdG9wIGNvbmRpdGlvbgoKICAgIGVuZAoKJVNldCB1cCBmdW5jdGlvbi4KICAgCiAgICBmdW5jdGlvbiBbY2x1c3RlcnMsZGlzdCxodWJzXT1zZXR1cChkYXRhKQoKJVNFVFVQICBbY2x1c3RlcnMsZGlzdCxodWJzXT1zZXR1cChkYXRhKQolICAgICAgIFRoaXMgZnVuY3Rpb24gIGFzc2lnbnMgdG8gdGhlIGNsdXN0ZXIgYXJyYXkgYWxsIG9uZXMsICh0ZWxsaW5nIHRoYXQKJSAgICAgICBhbGwgcG9pbnRzIGJlbG9uZyB0byBIdWIgMSksIGFzc2lnbnMgdG8gdGhlIGh1YiBhcnJheSBhIG9uZSBpbgolICAgICAgIHBvc2l0aW9uIDEgKHRlbGxpbmcgdGhhdCB0aGUgZmlyc3QgaHViIGlzIHBvaW50IDEpIGFuZCB6ZXJvcyBpbiBhbGwKJSAgICAgICBvdGhlciBsb2NhdGlvbnMuIFRoZSBkaXN0YW5jZSBhcnJheSBjb250YWlucyB0aGUgc3F1YXJlIG9mIHRoZQolICAgICAgIGRpc3RhbmNlIGZyb20gZWFjaCBwb2ludCB0byBodWIgb25lLgoKbj1zaXplKGRhdGEsMSk7CmNsdXN0ZXJzPW9uZXMobiwxKTsKaHVicz16ZXJvcyhuLDEpOwpodWJzKDEsMSk9MTsKCmRpc3QgPSBkaXN0YW5jZShkYXRhLGNsdXN0ZXJzKTsKCmVuZAoKJWRpc3RhbmNlIGZ1bmN0aW9uLgoKZnVuY3Rpb24gZGlzdD1kaXN0YW5jZShkLGMpCgolRElTVCAgIERpc3RhbmNlIGZyb20gaHViICBkaXN0PWRpc3RhbmNlKGQsYykKJSAgICAgICBUaGlzIGZpbmRzIHRoZSBzcXVhcmUgb2YgdGhlIGRpc3RhbmNlIGJldHdlZW4gZWFjaCBwb2ludCBhbmQgaXRzIGh1YgolICAgICAgIGQgPSBkYXRhIGNvb3JkaW5hdGVzIG9mIHBvaW50czsgYyA9IGNsdXN0ZXIgYXJyYXkgY29udGFpbnMgaW5kZXggb2YKJSAgICAgICBodWIgdG8gd2hpY2ggcG9pbnQgaXMgYXNzaWduZWQ7IGMoaSk9aiBwb2ludCBpIGJlbG9uZ3MgdG8gdGhlIGNsdXN0ZXIKJSAgICAgICB3aG9zZSBodWIgaXMgcG9pbnQgagoKbiA9IHNpemUoZCwxKTsKCiAgZm9yIGkgPSAxOm4KCiAgICAgIGRpZihpLDopPSBkKGMoaSksOikgLSBkKGksOik7CgogIGVuZAoKZGlzdCA9KCBzdW0oKGRpZi4qZGlmKScpKSc7ICAgICAlc3VtcyB0aGUgc3F1YXJlcyBvZiB0aGUgcm93IGVsZW1lbnRzCgplbmQKCiVGYXIgb3V0IGZ1bmN0aW9uCgpmdW5jdGlvbiBjb250aW51ZSA9IGZhcm91dChjb3VudGVyLGh1YnMsZGF0YSxtYXhkaXN0KQoKJUZBUk9VVCBjb250aW51ZSA9IGZhcm91dChjb3VudGVyLGh1YnMsZGF0YSxtYXhkaXN0KQolICAgICAgIENhbGN1bGF0ZXMgc3RvcCBjb25kaXRpb24uICBTdG9wcyBpZiB0aGUgcG9pbnQgZmFydGhlc3QgZnJvbSBpdHMKJSAgICAgICBodWIgaXMgd2l0aGluIHRoZSBhdmVyYWdlIGRpc3RhbmNlIHZhbHVlLgoKaW5kZXggPSAwOwoKCiAgICBmb3IgaSA9IDE6KGNvdW50ZXItMSkKICAgICAgICBmb3IgaiA9IGkrMSA6IGNvdW50ZXIKICAgICAgICAgICAgaW5kZXggPSBpbmRleCArIDE7CiAgICAgICAgICAgIGRpZihpbmRleCw6KT1kYXRhKGh1YnMoaSksOikgLSBkYXRhKGh1YnMoaiksOik7CiAgICAgICAgZW5kCiAgICBlbmQKCmRpc3Q9c3FydCgoc3VtKChkaWYuKmRpZiknKSknKTsKYXZlcmFnZV9kaXN0ID0gc3VtKGRpc3QpLygyKmluZGV4KTsKICBpZiBzcXJ0KG1heGRpc3QpIDwgYXZlcmFnZV9kaXN0CiAgICAgICAgY29udGludWUgPSAwOwogICAgICAgIGVsc2UgY29udGludWUgPTE7CiAgZW5kCmVuZCAgICAgICAgICAgICAgICAgICAgICAgICAgICAgJSBlbmQgZm9yIGZhcm91dAoKJUdyYXBoX2RhdGEgcGxvdHMgdGhlIHBvaW50cyBhbmQgZHJhd3MgdGhlIGF4ZXMKCnggPSBkYXRhKDosMSk7CnkgPSBkYXRhKDosMik7Cm1pbnggPSBtaW4oeCk7Cm1heHggPSBtYXgoeCk7Cm1pbnkgPSBtaW4oeSk7Cm1heHkgPSBtYXgoeSk7CnBsb3QoeCx5LCcqJykKYXhpcyhbbWlueC0xLCBtYXh4KzEsIG1pbnkgLSAxLCBtYXh5ICsgMV0pOwoKCiUgUmVkcmF3IGRyYXdzIHRoZSBwb2ludHMgc28gdGhhdCBlYWNoIGNsdXN0ZXIgaXMgYSBkaWZmZXJlbnQgY29sb3IuCiUgVGhlIGh1YnMgYXJlIHJlcHJlc2VudGVkIGJ5IGEgKyBhbmQgdGhlIG1lbWJlcnMgYXJlIHJlcHJlc2VudGVkIGJ5IGEgKi4KCm4gPSBzaXplKGRhdGEsMSk7CnBvaW50ZXIgPSB6ZXJvcyhuLCAxKTsKZm9yIGk9MTpjb3VudGVyCiAgIHBvaW50ZXIoaHVicyhpKSk9aTsgICAlIFBvaW50ZXIncyBpbmRpY2VzIGFyZSB0aGUgZGF0YSBwb2ludCBpbmRpY2VzCiAgICAgICAgICAgICAgICAgICAgICAgICAlIFBvaW50ZXIncyBjZWxscyBhcmUgdGhlIGh1YiBudW1iZXJzIGZvciB0aGUgcG9pbnRzCmVuZDsKaG9sZCBvZmY7CgolIENvbG9yIGNvZGUgdGhlIHBvaW50cyBiYXNlZCBvbiB0aGUgY2x1c3RlciBudW1iZXIKCmZvciBpID0gMTpuCiAgIHggPSBkYXRhKGksMSk7CiAgIHkgPSBkYXRhKGksMik7CiAgIGlmIHBvaW50ZXIoY2x1c3RlcnMoaSkpID09IDEKICAgICAgaWYgY2x1c3RlcnMoaSkgPT0gaQogICAgICAgICBwbG90KHgsIHksICd5KycpOwogICAgICBlbHNlCiAgICAgICAgIHBsb3QoeCwgeSwgJ3kqJyk7CiAgICAgIGVuZAogICAgICBob2xkIG9uOwogICBlbHNlaWYgcG9pbnRlcihjbHVzdGVycyhpKSkgPT0gMgogICAgICBpZiBjbHVzdGVycyhpKSA9PSBpCiAgICAgICAgIHBsb3QoeCwgeSwgJ20rJyk7CiAgICAgIGVsc2UKICAgICAgICAgcGxvdCh4LCB5LCAnbSonKTsKICAgICAgZW5kCiAgICAgIGhvbGQgb247CgogICBlbHNlaWYgcG9pbnRlcihjbHVzdGVycyhpKSkgPT0gMwogICAgICBpZiBjbHVzdGVycyhpKSA9PSBpCiAgICAgICAgIHBsb3QoeCwgeSwgJ2MrJyk7CiAgICAgIGVsc2UKICAgICAgICAgcGxvdCh4LCB5LCAnYyonKTsKICAgICAgZW5kCiAgICAgIGhvbGQgb24KICAgZWxzZWlmIHBvaW50ZXIoY2x1c3RlcnMoaSkpID09IDQKICAgICAgaWYgY2x1c3RlcnMoaSkgPT0gaQogICAgICAgICBwbG90KHgsIHksICdyKycpOwogICAgIGVsc2UKICAgICAgICAgcGxvdCh4LCB5LCAncionKTsKICAgICAgZW5kCiAgICAgIGhvbGQgb24KICAgZWxzZWlmIHBvaW50ZXIoY2x1c3RlcnMoaSkpID09IDUKICAgICAgaWYgY2x1c3RlcnMoaSkgPT0gaQogICAgICAgICBwbG90KHgsIHksICdnKycpOwogICAgICBlbHNlCiAgICAgICAgIHBsb3QoeCwgeSwgJ2cqJyk7CiAgICAgIGVuZAogICAgICBob2xkIG9uCiAgIGVsc2VpZiBwb2ludGVyKGNsdXN0ZXJzKGkpKSA9PSA2CiAgICAgIGlmIGNsdXN0ZXJzKGkpID09IGkKICAgICAgICAgcGxvdCh4LCB5LCAnYisnKTsKICAgICAgZWxzZQogICAgICAgICBwbG90KHgsIHksICdiKicpOwogICAgICBlbmQKICAgICAgaG9sZCBvbgogICBlbHNlCiAgICAgIGlmIGNsdXN0ZXJzKGkpID09IGkKICAgICAgICAgcGxvdCh4LCB5LCAndysnKTsKICAgICAgZWxzZQogICAgICAgICBwbG90KHgsIHksICd3KicpOwogICAgICBlbmQKICAgICAgaG9sZCBvbgogICBlbmQKZW5kCgolIFNldHMgdXAgdGhlIGF4ZXMKCnggPSBkYXRhKDosMSk7CnkgPSBkYXRhKDosMik7Cm1pbnggPSBtaW4oeCk7Cm1heHggPSBtYXgoeCk7Cm1pbnkgPSBtaW4oeSk7Cm1heHkgPSBtYXgoeSk7CmF4aXMoW21pbngtMSwgbWF4eCsxLCBtaW55IC0gMSwgbWF4eSArIDFdKTsK