简单谈谈AGNES聚类

2022-07-29 19:42:53 浏览数 (1)

AGNES是一种采用自底向上聚合策略的层次聚类算法。思路也很简单:

1.最开始每一个点都是一个单独的簇。2.算出所有簇之间的两两距离,选择距离最短的两个簇进行合并。3.重复步骤2直到簇的个数减小到我们指定的数目。一个问题:怎么计算两个簇之间的距离?其实就是计算两个簇之间所有点的两两距离,最后取平均值。

代码语言:javascript复制
import matplotlib.pyplot as plt
import numpy as np

cluster_Num = 4
color = ['red', 'black', 'blue', 'orange']
C = []
x = []
y = []
data = open('聚类数据集/dataset.txt')
for line in data.readlines():
    x.append(float(line.strip().split('t')[0]))
    y.append(float(line.strip().split('t')[1]))

for i in range(len(x)):
    C.append([i])

def distance(Ci, Cj):    #计算两个簇之间的距离
    dis = []
    for i in Ci:
        for j in Cj:
            dis.append(np.sqrt((x[i] - x[j]) ** 2   (y[i] - y[j]) ** 2))
    dis = list(set(dis))
    return np.mean(dis)   #平均距离

def find_Two_cluster():
    temp = []
    for i in range(len(C)):
        for j in range(i 1, len(C)):
            dis = distance(C[i], C[j])
            temp.append([i, j, dis])

    temp = sorted(temp, key=lambda x:x[2])
    return temp[0][0], temp[0][1]


def agnes():
    global C
    while len(C) > cluster_Num:
        i, j =find_Two_cluster()
        merge = C[i]   C[j]
        C = [C[t] for t in range(len(C)) if t != i and t != j]
        C.append(merge)

    for i in range(len(C)):
        X = []
        Y = []
        for j in range(len(C[i])):
            X.append(x[C[i][j]])
            Y.append(y[C[i][j]])
        plt.scatter(X, Y, c=color[i])

    plt.legend(['C1', 'C2', 'C3', 'C4'])
    plt.title('agnes')
    plt.show()


if __name__ == '__main__':
    agnes()

0 人点赞