说明:将10组列表分为4类
测试函数
代码语言:javascript复制#include <iostream>
#include "KMeans.h"
using namespace std;
int main()
{
double data[] = {
0.0, 0.2, 0.4,
0.3, 0.2, 0.4,
0.4, 0.2, 0.4,
0.5, 0.2, 0.4,
5.0, 5.2, 8.4,
6.0, 5.2, 7.4,
4.0, 5.2, 4.4,
10.3, 10.4, 10.5,
10.1, 10.6, 10.7,
11.3, 10.2, 10.9
};
const int size = 10; //样本数目
const int dim = 3; //三维矩阵
const int cluster_num = 4; //分成4类
KMeans* kmeans = new KMeans(dim,cluster_num);//初始化构造函数
int* labels = new int[size];
kmeans->SetInitMode(KMeans::InitUniform);
kmeans->Cluster(data,size,labels);
for(int i = 0; i < size; i)
{
printf("%f, %f, %f belongs to %d clustern", data[i*dim 0], data[i*dim 1], data[i*dim 2], labels[i]);
}
delete []labels;
delete kmeans;
return 0;
}
函数声明
代码语言:javascript复制
#pragma once
#include <fstream>
#include <string.h>
#include <assert.h>
class KMeans
{
public:
enum InitMode
{
InitRandom,
InitManual,
InitUniform,
};
KMeans(int dimNum = 1, int clusterNum = 1);
~KMeans();
void SetInitMode(int i){ m_initMode = i; }
void Init(double *data, int N);
void Cluster(double *data, int N, int *Label);
private:
int m_dimNum;
int m_clusterNum;
double** m_means;
int m_initMode;
int m_maxIterNum;
double m_endError;
double GetLabel(const double* x, int* label);
double CalcDistance(const double* x, const double* u, int dimNum);
};
具体实现
代码语言:javascript复制
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <iostream>
#include "KMeans.h"
using namespace std;
KMeans::KMeans(int dimNum, int clusterNum)
{
m_dimNum = dimNum;
m_clusterNum = clusterNum;
m_means = new double*[m_clusterNum];//1*4的数组 []
for (int i = 0; i < m_clusterNum; i )
{
m_means[i] = new double[m_dimNum];
memset(m_means[i], 0, sizeof(double) * m_dimNum);
}
m_initMode = InitRandom;
m_maxIterNum = 100;
m_endError = 0.001;
}
KMeans::~KMeans()
{
for (int i = 0; i < m_clusterNum; i )
{
delete[] m_means[i];
}
delete[] m_means;
}
void KMeans::Cluster(double *data, int N, int *Label)
{
int size = 0;
size=N;
assert(size >= m_clusterNum);
// 初始化模型
Init(data,N);
// 循环递归
double* x = new double[m_dimNum]; // 样本数据
int label = -1; // 类别索引
double iterNum = 0;
double lastCost = 0;
double currCost = 0;
int unchanged = 0;
bool loop = true;
int* counts = new int[m_clusterNum];
double** next_means = new double*[m_clusterNum]; // 再次分类新的模型
//初始化新模型
for (int i = 0; i < m_clusterNum; i )
{
next_means[i] = new double[m_dimNum];
}
//迭代次数
while (loop)
{
memset(counts, 0, sizeof(int) * m_clusterNum);
for (int i = 0; i < m_clusterNum; i )
{
memset(next_means[i], 0, sizeof(double) * m_dimNum);
}
lastCost = currCost;
currCost = 0;
// 进行初次分类
for (int i = 0; i < size; i )
{
for(int j = 0; j < m_dimNum; j )
x[j] = data[i*m_dimNum j];
currCost = GetLabel(x, &label);
counts[label] ;
for (int d = 0; d < m_dimNum; d )
{
next_means[label][d] = x[d];
}
}
currCost /= size;//平均距离,为再次分类做准备
// 重新分类
for (int i = 0; i < m_clusterNum; i )
{
if (counts[i] > 0)
{
for (int d = 0; d < m_dimNum; d )
{
next_means[i][d] /= counts[i];
}
memcpy(m_means[i], next_means[i], sizeof(double) * m_dimNum);
}
}
// 迭代终止条件
iterNum ;
if (fabs(lastCost - currCost) < m_endError * lastCost)
{
unchanged ;
}
if (iterNum >= m_maxIterNum || unchanged >= 3)
{
loop = false;
}
cout << "Iter: " << iterNum << ", Average Cost: " << currCost << endl;
}
// 跳出迭代,输出结果
for (int i = 0; i < size; i )
{
for(int j = 0; j < m_dimNum; j )
x[j] = data[i*m_dimNum j];
GetLabel(x, &label);
Label[i] = label;
}
delete[] counts;
delete[] x;
for (int i = 0; i < m_clusterNum; i )
{
delete[] next_means[i];
}
delete[] next_means;
}
//初始化模型
void KMeans::Init(double *data, int N)
{
int size = N;
if (m_initMode == InitRandom)
{
int inteval = size / m_clusterNum;//每个分类里有几组
double* sample = new double[m_dimNum];
// Seed the random-number generator with current time
srand((unsigned)time(NULL));
for (int i = 0; i < m_clusterNum; i )
{
int select = inteval * i (inteval - 1) * rand() / RAND_MAX;
for(int j = 0; j < m_dimNum; j )
sample[j] = data[select*m_dimNum j];
memcpy(m_means[i], sample, sizeof(double) * m_dimNum);
}
delete[] sample;
}
else if (m_initMode == InitUniform)
{
double* sample = new double[m_dimNum];
for (int i = 0; i < m_clusterNum; i )
{
int select = i * size / m_clusterNum;
for(int j = 0; j < m_dimNum; j )
sample[j] = data[select*m_dimNum j];
memcpy(m_means[i], sample, sizeof(double) * m_dimNum);
}
delete[] sample;
}
else if (m_initMode == InitManual)
{
// Do nothing
}
}
//返回分类标准,以距离为准
double KMeans::GetLabel(const double* sample, int* label)
{
double dist = -1;
for (int i = 0; i < m_clusterNum; i )
{
double temp = CalcDistance(sample, m_means[i], m_dimNum);
if (temp < dist || dist == -1)
{
dist = temp;
*label = i;
}
}
return dist;
}
//计算街区距离
double KMeans::CalcDistance(const double* x, const double* u, int dimNum)
{
double temp = 0;
for (int d = 0; d < dimNum; d )
{
temp = (x[d] - u[d]) * (x[d] - u[d]);
}
return sqrt(temp);
}
博主:菜鸟程序员
初衷:学习资料,程序设计,视觉算法,求职经验,工作心得