AGNES是一种采用自底向上聚合策略的层次聚类算法。思路也很简单:
1.最开始每一个点都是一个单独的簇。2.算出所有簇之间的两两距离,选择距离最短的两个簇进行合并。3.重复步骤2直到簇的个数减小到我们指定的数目。一个问题:怎么计算两个簇之间的距离?其实就是计算两个簇之间所有点的两两距离,最后取平均值。
代码语言:javascript复制import matplotlib.pyplot as plt
import numpy as np
cluster_Num = 4
color = ['red', 'black', 'blue', 'orange']
C = []
x = []
y = []
data = open('聚类数据集/dataset.txt')
for line in data.readlines():
x.append(float(line.strip().split('t')[0]))
y.append(float(line.strip().split('t')[1]))
for i in range(len(x)):
C.append([i])
def distance(Ci, Cj): #计算两个簇之间的距离
dis = []
for i in Ci:
for j in Cj:
dis.append(np.sqrt((x[i] - x[j]) ** 2 (y[i] - y[j]) ** 2))
dis = list(set(dis))
return np.mean(dis) #平均距离
def find_Two_cluster():
temp = []
for i in range(len(C)):
for j in range(i 1, len(C)):
dis = distance(C[i], C[j])
temp.append([i, j, dis])
temp = sorted(temp, key=lambda x:x[2])
return temp[0][0], temp[0][1]
def agnes():
global C
while len(C) > cluster_Num:
i, j =find_Two_cluster()
merge = C[i] C[j]
C = [C[t] for t in range(len(C)) if t != i and t != j]
C.append(merge)
for i in range(len(C)):
X = []
Y = []
for j in range(len(C[i])):
X.append(x[C[i][j]])
Y.append(y[C[i][j]])
plt.scatter(X, Y, c=color[i])
plt.legend(['C1', 'C2', 'C3', 'C4'])
plt.title('agnes')
plt.show()
if __name__ == '__main__':
agnes()