fork download
  1. # Single linkage hirerarichal Algorithm
  2.  
  3. %Cluster reads data from a file called data.dat. It will arbitrarily
  4. %choose one point to be a hub and cluster all the points around this hub.
  5. %It then finds the point farthest away from the hub and makes this point a
  6. %new hub. Next it clusters the data around the hub it is nearest. This
  7. %process is repeated until the distance from every point to its hub is
  8. %less than half the average distance between all pairs of hubs.
  9.  
  10. load data.dat;
  11. graph_data; %Plots original points
  12. [clusters,dist,hubs] = setup(data); %Clusters all points around first point
  13.  
  14. counter =1; %Keeps track of number of the present number of hubs
  15. continue = 1; % 1=true 0=false indicates whether to continue forming new
  16. %hubs
  17.  
  18. while continue
  19. counter = counter + 1; % adding new hub
  20. [m,i]=max(dist); %m= maximum value in the distance array
  21. %i is the location of the maximum value in the
  22. % array
  23. hubs(counter)=i; %assigns index to new hub
  24.  
  25. [dist,clusters] = recluster(data,dist,clusters,i); %Clusters points
  26. %to nearest hub
  27. pause
  28. redraw %Draws new clusters
  29. maxdist=max(dist); %returns distance of point farthest from its hub
  30. continue = farout(counter,hubs,data,maxdist); %Checks the stop condition
  31.  
  32. end
  33.  
  34. %Set up function.
  35.  
  36. function [clusters,dist,hubs]=setup(data)
  37.  
  38. %SETUP [clusters,dist,hubs]=setup(data)
  39. % This function assigns to the cluster array all ones, (telling that
  40. % all points belong to Hub 1), assigns to the hub array a one in
  41. % position 1 (telling that the first hub is point 1) and zeros in all
  42. % other locations. The distance array contains the square of the
  43. % distance from each point to hub one.
  44.  
  45. n=size(data,1);
  46. clusters=ones(n,1);
  47. hubs=zeros(n,1);
  48. hubs(1,1)=1;
  49.  
  50. dist = distance(data,clusters);
  51.  
  52. end
  53.  
  54. %distance function.
  55.  
  56. function dist=distance(d,c)
  57.  
  58. %DIST Distance from hub dist=distance(d,c)
  59. % This finds the square of the distance between each point and its hub
  60. % d = data coordinates of points; c = cluster array contains index of
  61. % hub to which point is assigned; c(i)=j point i belongs to the cluster
  62. % whose hub is point j
  63.  
  64. n = size(d,1);
  65.  
  66. for i = 1:n
  67.  
  68. dif(i,:)= d(c(i),:) - d(i,:);
  69.  
  70. end
  71.  
  72. dist =( sum((dif.*dif)'))'; %sums the squares of the row elements
  73.  
  74. end
  75.  
  76. %Far out function
  77.  
  78. function continue = farout(counter,hubs,data,maxdist)
  79.  
  80. %FAROUT continue = farout(counter,hubs,data,maxdist)
  81. % Calculates stop condition. Stops if the point farthest from its
  82. % hub is within the average distance value.
  83.  
  84. index = 0;
  85.  
  86.  
  87. for i = 1:(counter-1)
  88. for j = i+1 : counter
  89. index = index + 1;
  90. dif(index,:)=data(hubs(i),:) - data(hubs(j),:);
  91. end
  92. end
  93.  
  94. dist=sqrt((sum((dif.*dif)'))');
  95. average_dist = sum(dist)/(2*index);
  96. if sqrt(maxdist) < average_dist
  97. continue = 0;
  98. else continue =1;
  99. end
  100. end % end for farout
  101.  
  102. %Graph_data plots the points and draws the axes
  103.  
  104. x = data(:,1);
  105. y = data(:,2);
  106. minx = min(x);
  107. maxx = max(x);
  108. miny = min(y);
  109. maxy = max(y);
  110. plot(x,y,'*')
  111. axis([minx-1, maxx+1, miny - 1, maxy + 1]);
  112.  
  113.  
  114. % Redraw draws the points so that each cluster is a different color.
  115. % The hubs are represented by a + and the members are represented by a *.
  116.  
  117. n = size(data,1);
  118. pointer = zeros(n, 1);
  119. for i=1:counter
  120. pointer(hubs(i))=i; % Pointer's indices are the data point indices
  121. % Pointer's cells are the hub numbers for the points
  122. end;
  123. hold off;
  124.  
  125. % Color code the points based on the cluster number
  126.  
  127. for i = 1:n
  128. x = data(i,1);
  129. y = data(i,2);
  130. if pointer(clusters(i)) == 1
  131. if clusters(i) == i
  132. plot(x, y, 'y+');
  133. else
  134. plot(x, y, 'y*');
  135. end
  136. hold on;
  137. elseif pointer(clusters(i)) == 2
  138. if clusters(i) == i
  139. plot(x, y, 'm+');
  140. else
  141. plot(x, y, 'm*');
  142. end
  143. hold on;
  144.  
  145. elseif pointer(clusters(i)) == 3
  146. if clusters(i) == i
  147. plot(x, y, 'c+');
  148. else
  149. plot(x, y, 'c*');
  150. end
  151. hold on
  152. elseif pointer(clusters(i)) == 4
  153. if clusters(i) == i
  154. plot(x, y, 'r+');
  155. else
  156. plot(x, y, 'r*');
  157. end
  158. hold on
  159. elseif pointer(clusters(i)) == 5
  160. if clusters(i) == i
  161. plot(x, y, 'g+');
  162. else
  163. plot(x, y, 'g*');
  164. end
  165. hold on
  166. elseif pointer(clusters(i)) == 6
  167. if clusters(i) == i
  168. plot(x, y, 'b+');
  169. else
  170. plot(x, y, 'b*');
  171. end
  172. hold on
  173. else
  174. if clusters(i) == i
  175. plot(x, y, 'w+');
  176. else
  177. plot(x, y, 'w*');
  178. end
  179. hold on
  180. end
  181. end
  182.  
  183. % Sets up the axes
  184.  
  185. x = data(:,1);
  186. y = data(:,2);
  187. minx = min(x);
  188. maxx = max(x);
  189. miny = min(y);
  190. maxy = max(y);
  191. axis([minx-1, maxx+1, miny - 1, maxy + 1]);
  192.  
Success #stdin #stdout #stderr 0.17s 65048KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
error: load: unable to find file data.dat