【模式识别】聚类分析

// Clustreing_Algorithm.cpp : 定义控制台应用程序的入口点。
//

#include "stdafx.h"
#include 
#include 
#include 
#include 
#include 
#include 
#include 

using namespace std;

#define FILENAME_LENGTH 20//文件名长度限制
#define BUFFER_LENGTH 50//缓存长度限制
#define CENTER_LENGTH 10//存放聚类中心下标数组的长度限制

//结构体数据
struct Data
{
	int type;//原始文件数据类别
	float x;
	float y;
	int result=0;//分类结果
	int change = 0;//分类结果是否发生了变化,1代表修改了result结果,0代表没有修改
};

//计算最大距离最小距离过程中使用的结构体/作废
struct Find
{
	int index;
	double distance;
};

vector dataSet;//存放所有的数据集
//vector findStruct;
int center[CENTER_LENGTH];//存放聚类中心下标的数组


/*从文件中读取数据*/
void readData(char *filename)
{
	int i = 0;
	int type;
	float x;
	float y;
	char buffer[BUFFER_LENGTH];
	Data datatemp;
	ifstream file(filename);
	if (!file.is_open())
	{
		cout << "Error:open the file!" << endl;
		exit(1);
	}
	while (!file.eof())
	{
		file.getline(buffer,BUFFER_LENGTH);
		sscanf_s(buffer, "%d %f %f", &type, &x, &y);
		datatemp.type = type;
		datatemp.x = x;
		datatemp.y = y;
		dataSet.push_back(datatemp);
	}
	file.close();
}

/*数据输出,返回值为数据的行数*/
int outputData()
{
	cout << endl;
	int i = 0;
	cout << setw(6) << "序号" << setw(10) <<"文件类型"<< setw(10) << "特征1"<< setw(18) << "特征2" << setw(10) << "分类" << endl;
	for (i = 0; i < dataSet.size(); i++)
	{
		cout << setw(6) << i + 1 << ":" << setw(4) << dataSet.at(i).type << setw(16) << dataSet.at(i).x << setw(16) << dataSet.at(i).y << setw(10) << dataSet.at(i).result << endl;
	}
	return i;
}

/*计算两个点之间的距离*/
double getDistance(Data a,Data b)
{
	return sqrt((a.x - b.x)*(a.x - b.x) + (a.y - b.y)*(a.y - b.y));
}

/*计算数组中最大的元素,参数为数组、数组长度,返回下标*/
int getMax(vector array,int size)
{
	double max=array[0];
	int index = 0, i = 0;
	for (i = 0; i < size; i++)
	{
		if (array[i] > max)
		{
			max = array[i];
			index = i;
		}
	}
	return index;
}

/*最大最小距离算法进行聚类,参数是数据的行数,返回值是K-means的K值大小*/
int maxMin(int line)
{
	int flag = 1;
	int chongfu = 0;//标志位,判断聚类中心是否重复
	double a = 0.4;//给定参数a,0 distancei0(line);//变长数组存放所有点和第一个聚类中心之间的距离
	vector distancei1(line);//变长数组存放所有点和第二个聚类中心之间的距离
	vector distanceMin(line);//临时变量
	vector> matrix;//二维矩阵存储每个模式到聚类中心的距离
	double distanceMax,min,max;//临时变量
	double diatance12;//聚类中心1和聚类中心2之间的距离
	int index = 0;//下标
	int centerP = 0;//指示当前元素下标
	//选取第一个样本为聚类中心,计算其他点和第一个点之间的距离
	for (int i = 0; i < dataSet.size(); i++)
	{
		distancei0[i] = getDistance(dataSet.at(i), dataSet.at(0));
	}
	matrix.push_back(distancei0);
	//计算距离第一个点最远的那个点
	distanceMax = distancei0[0];
	center[0] = 0;
	for (int i = 0; i < distancei0.size(); i++)
	{
		if (distancei0[i] > distanceMax)
		{
			distanceMax = distancei0[i];
			index = i;
		}
	}
	center[++centerP] = index;//第二个聚类中心
	diatance12 = getDistance(dataSet.at(0), dataSet.at(index));//计算聚类中心1和聚类中心2之间的距离
	//计算所有的点与第二个聚类中心之间的距离
	for (int i = 0; i < distancei1.size(); i++)
	{
		distancei1[i] = getDistance(dataSet.at(i), dataSet.at(center[1]));
	}
	matrix.push_back(distancei1);
	//计算全部聚类中心
	while (1)
	{
		//最小距离
		for (int j = 0; j < line; j++)
		{
			min = matrix.at(0).at(j);//初始化最小值
			for (int i = 0; i < matrix.size(); i++)
			{
				if (matrix.at(i).at(j) < min)
				{
					min = matrix.at(i).at(j);
				}
			}
			distanceMin[j] = min;
		}
		//最大距离 
		index = getMax(distanceMin, line);
		//判断是否产生新的聚类中心
		if ((distanceMin[index] > (a*diatance12)))
		{
			center[++centerP] = index;//新的聚类中心
									  //若产生新的聚类中心,则需要计算其他所有样本到新聚类中心的距离
			vector newdistance(line);
			for (int i = 0; i < line; i++)
			{
				newdistance[i] = getDistance(dataSet.at(i), dataSet.at(index));
			}
			//新产生的向量加入矩阵
			matrix.push_back(newdistance);
		}
		else break;
	}
	//输出聚类中心
	index = 0;
	cout < array, int size)
{
	double min = array[0];
	int index = 0, i = 0;
	for (i = 0; i < size; i++)
	{
		if (array[i] < min)
		{
			min = array[i];
			index = i;
		}
	}
	return index;
}

/*求得当前分类的个数,参数Kmeans的K值和数据集的大小*作废
vector getCount(int k,int line)
{
	vector count(k);
	for (int i = 0; i < line; i++)
	{
		for (int j = 0; j < k; j++)
		{
			if (dataSet.at(i).type == j)
			{
				count[j] += 1;
			}
		}
		
	}
	return count;
}

/*Kmeans算法,参数为K值*/
vector Kmeans(int k,int line)
{
	int index = 0;
	double x=0;
	double y=0;
	int flag = 1;
	//vector > result(k);
	vector  count(k);//计算每个类别的个数
	vector distanceToClassHeart(k);//每个模式距离K个类心的距离
	vector newCenter(k);//类心
	//newCenter初始化
	for (int i = 0; i < k; i++)
	{
		newCenter[i] = dataSet.at(center[i]);
	}
	while (flag==1)
	{
		//change初始化
		for (int i = 0; i < line; i++)
		{
			dataSet.at(i).change = 0;
		}
		//计算所有模式和新的类心之间的距离
		for (int i = 0; i < line; i++)
		{
			for (int j = 0; j < k; j++)
			{
				distanceToClassHeart[j] = getDistance(dataSet.at(i), newCenter.at(j));
			}
			index = getMin(distanceToClassHeart, k);//获取距离当前模式欧氏距离最近的类心的下标
			if (dataSet.at(i).result != index)//每次大循环的时候都先初始化所有data中change的数值为0,如果本次发生改变就修改change的数值
			{
				dataSet.at(i).change = 1;
			}
			dataSet.at(i).result = index;//分类
			//count[index] += 1;
			//result.at(index).resize(count[index]);
			//int p = count[index] - 1;
			//int p=result.at(index).size()-1;
			//result.at(index).at(p) = i;//ok
		}
		//求得新的类心
		for (int i = 0; i < k; i++)
		{
			x = 0; y = 0; count[i] = 0;
			for (int j = 0; j < line; j++)
			{
				if (dataSet.at(j).result == i)
				{
					x += dataSet.at(j).x;
					y += dataSet.at(j).y;
					count[i] += 1;
				}
			}
			newCenter[i].x = x / count[i];
			newCenter[i].y = y / count[i];
		}
		/*
		for (int i = 0; i < k; i++)
		{
			x = 0;
			y = 0;
			for (int j = 0; j < count[i]; j++)
			{
				x += dataSet.at(result[i][j]).x;//result长度在第三次迭代时变成了0???溢出。。。
				y += dataSet.at(result[i][j]).y;
			}
			Data newclassheart;
			newclassheart.x = x / count[i];
			newclassheart.y = y / count[i];
			newCenter[i] = newclassheart;
			count[i] = 0;//清零计数数组
		}*/
		//计算所有模式和新的类心之间的距离
		//vector > result(k);
		/*
		for (int i = 0; i < line; i++)
		{
			for (int j = 0; j < k; j++)
			{
				distanceToClassHeart[j] = getDistance(dataSet.at(i), newCenter.at(j));
			}
			index = getMin(distanceToClassHeart, k);//获取距离当前模式欧氏距离最近的类心的下标
			if (dataSet.at(i).result != index)//每次大循环的时候都先初始化所有data中change的数值为0,如果本次发生改变就修改change的数值
			{
				dataSet.at(i).change = 1;
			}
			dataSet.at(i).result = index;//分类
			//result[index][count[index] - 1] = i;//当前模式分类情况写入result二维数组
			count[index] += 1;
			//result.at(index).resize(count[index]);
			//int p = count[index] - 1;
			//int p = result.at(index).size() - 1;
			//result.at(index).at(p) = i;
		}*/
		bool sum = false;
		for (int i = 0; i < line; i++)
		{
			if (dataSet.at(i).change == 1)
			{
				sum = true;
				break;
			}
		}
		if (!sum)
			flag=0;
	}
	
	return newCenter;
}

void outputResult()
{
	cout << endl;
	int i = 0;
	cout << setw(6) << "序号" <<  setw(10) << "特征1" << setw(18) << "特征2" << setw(10) << "分类" << endl;
	for (i = 0; i < dataSet.size(); i++)
	{
		cout << setw(6) << i + 1 << ":" << setw(16) << dataSet.at(i).x << setw(16) << dataSet.at(i).y << setw(10) << dataSet.at(i).result << endl;
	}
}
/*输出经过Kmeans聚类之后每个类别的个数*/作废
void outputResult(int k,vectorcount)
{
	cout << endl << "共分成" << k << "类:" << endl;
	for (int i = 0; i < k; i++)
	{
		cout << "第" << i + 1 << "类共有:" << setw(6) << count[i] << "个模式" << endl;
	}
	cout << endl;
}

/*类间距离,使用重心距离法*/
void centerToCenter(vector array)
{
	int k = array.size();
	vector> distance(k);
	/*输出Kmeans之后的质心*/
	cout << endl;
	for (int i = 0; i < array.size(); i++)
	{
		cout << "第"< arrayCount;
	//程序提示
	char filename[FILENAME_LENGTH];
	cout << "请输入要打开的文件名,如:4k2_far.txt" << endl;
	cin >> filename;
	//读入数据
	readData(filename);
	//输出数据
	line=outputData();
	//最大最小距离算法
	k=maxMin(line);
	//K-means算法聚类
	arrayCount=Kmeans(k,line);
	//输出聚类之后的数据
	outputResult();
	//输出类间距离
	centerToCenter(arrayCount);
	system("pause");
    return 0;
}

文件

1 2.7266 3.0102
1 3.1304 2.4673
1 3.0492 2.525
1 3.226 3.1649
1 2.7223 2.5713
1 3.2862 2.8255
1 3.111 3.2994
1 3.2398 2.9681
1 2.8661 2.5533
1 3.2616 3.4902
1 1.99 3.2137
1 2.7017 2.61
1 3.0131 3.5208
1 2.8395 2.6816
1 2.9831 3.1657
1 3.7537 2.6608
1 3.0544 2.6474
1 3.3826 3.2356
1 3.2362 3.1535
1 3.0911 2.7883
1 2.4905 2.8723
1 2.8458 2.7137
1 2.7267 3.1528
1 3.1643 3.0671
1 3.3906 2.3585
1 2.1004 3.0724
1 2.8911 3.2043
1 2.6157 2.1725
1 3.1961 3.1735
1 1.7841 3.0763
1 3.4923 3.4455
1 3.4772 3.3968
1 3.3189 3.5495
1 3.2798 2.1895
1 2.2937 3.3527
1 2.8161 3.2286
1 2.3536 3.5656
1 3.3436 2.9659
1 3.0465 2.5304
1 3.9403 3.0006
1 2.9572 2.5322
1 3.1434 2.7548
1 2.9806 3.0031
1 3.6446 2.7736
1 2.8164 3.6278
1 3.4821 4.0864
1 3.6661 2.6847
1 2.3413 3.4814
1 3.2312 2.7373
1 2.6234 2.5361
1 3.2563 2.973
1 2.8906 2.0936
1 3.1462 3.7418
1 2.6438 2.7712
1 3.5794 2.373
1 2.3408 3.4678
1 2.9322 3.1776
1 3.2282 3.4508
1 2.8451 2.6851
1 3.1893 2.7909
1 2.7477 2.3049
1 2.5491 3.6024
1 3.3117 3.0164
1 3.0925 3.1162
1 2.884 2.7418
1 3.118 2.6412
1 3.4545 2.0397
1 2.4624 3.095
1 2.5876 2.6469
1 3.0391 3.5873
1 2.6821 3.3018
1 1.9979 2.7849
1 3.6046 4.4167
1 2.8052 2.3741
1 3.3704 3.5485
1 2.5016 2.8316
1 2.4297 2.3209
1 3.5564 2.5427
1 2.8552 2.5938
1 2.5227 2.2445
1 3.6131 3.0072
1 3.5096 3.6762
1 2.8118 2.9116
1 3.4572 3.1999
1 3.2817 2.5862
1 3.1585 2.6506
1 3.3324 2.3196
1 3.282 3.3363
1 2.9604 3.1444
1 2.7604 2.7121
1 2.953 2.5977
1 2.518 3.3706
1 3.1038 3.2042
1 2.6108 3.2932
1 2.7133 3.5879
1 3.022 3.2819
1 3.2887 3.6587
1 2.7811 2.5675
1 3.1395 2.8455
1 2.8075 2.9971
1 2.5235 3.3553
1 3.3622 3.1813
1 2.6712 4.1989
1 3.1562 3.9358
1 2.9157 3.2074
1 2.3513 2.7011
1 3.2596 2.7134
1 2.7007 2.8752
1 3.0785 2.813
1 3.5539 2.7665
1 3.4948 2.4448
1 3.4598 2.0789
1 2.4891 2.8472
1 3.3896 2.505
1 2.5973 2.8949
1 3.5049 3.6834
1 2.559 2.9035
1 2.1629 3.0291
1 2.5993 2.5164
1 3.1497 3.167
1 2.7022 3.2358
1 3.0475 3.2566
1 3.2959 3.3595
1 2.8036 2.3961
1 2.9221 3.2034
1 3.097 3.7377
1 2.7234 3.4401
1 2.6777 2.7403
1 2.1883 2.8882
1 3.0051 3.3939
1 2.7537 2.5876
1 2.6726 3.4831
1 3.3822 2.4474
1 2.6018 2.4145
1 3.2896 3.308
1 2.7854 2.3219
1 2.574 2.3544
1 2.7345 2.6018
1 4.2289 3.058
1 2.6282 3.4221
1 3.3936 3.1938
1 2.4638 3.8204
1 3.0994 2.4435
1 3.508 3.7114
1 2.6488 2.0721
1 2.2956 3.4115
1 1.8437 3.1186
1 2.83 2.7157
1 2.7668 3.8732
2 7.4256 8.266
2 8.703 7.4874
2 8.4307 7.7491
2 8.0501 8.6516
2 8.5896 8.4566
2 8.0519 8.3274
2 8.4173 7.5257
2 8.579 7.8808
2 8.0708 7.9862
2 8.0786 8.4343
2 8.2881 8.821
2 8.2579 7.8648
2 7.5377 7.2675
2 7.7397 7.5936
2 8.3348 7.9979
2 8.0718 8.6614
2 7.3806 8.1896
2 7.6868 7.733
2 8.2079 7.3411
2 7.7795 7.8079
2 9.2059 7.8076
2 8.5043 8.3472
2 7.4194 7.2467
2 7.6992 7.892
2 7.9446 7.872
2 7.2857 7.1511
2 8.0719 8.0038
2 8.2507 7.8614
2 8.4375 7.5687
2 7.9359 7.9991
2 8.0518 8.7005
2 8.2761 7.984
2 8.7712 7.7401
2 7.9218 7.7272
2 8.475 7.9472
2 8.3559 8.2356
2 8.6742 7.5134
2 7.6074 8.1905
2 7.8984 8.1305
2 8.0892 7.6176
2 8.4908 8.1264
2 7.2717 7.2312
2 7.6027 7.9745
2 8.6869 8.1516
2 7.3566 8.2209
2 8.0468 8.8111
2 8.2436 7.4516
2 8.7382 8.6116
2 7.7793 8.0716
2 7.9856 8.3365
2 8.8393 7.5225
2 7.8965 7.8471
2 8.5374 7.8589
2 8.7422 8.0283
2 8.3421 8.6652
2 7.8882 8.2419
2 7.9541 8.6037
2 8.3586 8.0111
2 7.853 7.6305
2 8.0515 8.1975
2 8.497 8.0638
2 8.8102 7.2967
2 8.4419 7.8188
2 8.2358 7.6146
2 8.1958 7.9179
2 8.1346 7.4925
2 7.0101 8.7125
2 8.2017 7.9962
2 8.1583 8.3481
2 8.1205 7.9192
2 7.6749 7.6971
2 7.5198 8.2414
2 7.7911 8.4015
2 7.8028 8.2066
2 8.3084 7.3386
2 7.756 7.919
2 8.0038 8.0903
2 7.8845 7.1217
2 7.7541 7.8179
2 7.3147 8.3037
2 8.0674 8.2433
2 7.8393 8.482
2 8.1533 8.1583
2 7.8958 7.3012
2 8.0175 8.4396
2 7.6834 7.5814
2 8.6523 8.0479
2 7.6882 7.7331
2 7.8279 8.0414
2 7.6931 7.7269
2 8.2605 8.4346
2 7.6377 7.6857
2 8.2584 8.5359
2 8.0147 8.0012
2 8.7019 7.5311
2 7.405 7.5773
2 7.4772 7.7605
2 8.2869 8.7212
2 8.5142 6.4754
2 7.8035 8.2711
2 8.9912 8.1837
2 8.2268 8.7324
2 7.9031 7.3747
2 8.7893 8.3092
2 8.1417 7.9259
2 8.2175 7.8444
2 8.6563 8.506
2 7.6683 7.4926
2 8.6393 7.0973
2 8.5349 7.8904
2 7.8057 8.3872
2 8.3471 7.6245
2 7.8746 7.8769
2 8.3337 7.8474
2 8.0863 8.2693
2 7.7654 7.836
2 8.3298 8.5828
2 7.8341 7.3557
2 7.0727 8.0753
2 8.6399 7.0368
2 7.9424 7.8251
2 8.0321 7.0214
2 8.4529 8.317
2 8.7172 7.8672
2 7.3876 8.1845
2 8.6239 7.6609
2 8.502 8.2946
2 7.5958 8.3769
2 8.3163 8.0605
2 7.7711 8.05
2 8.8454 8.4523
2 8.4656 8.4876
2 8.604 7.6903
2 8.2437 7.8624
2 7.6448 8.5157
2 8.3876 8.3474
2 7.9066 8.2761
2 7.7884 7.1085
2 7.5304 7.6864
2 8.518 9.0936
2 7.3841 7.5885
2 7.8675 8.1986
2 8.1031 8.0378
2 7.9853 7.8377
2 7.0016 8.2089
2 8.5393 8.3508
2 7.2779 8.4457
3 3.2709 6.1369
3 2.9987 5.9714
3 2.5602 5.9316
3 2.9639 6.0448
3 3.2984 6.1485
3 3.2752 6.3167
3 3.5049 6.3118
3 2.9292 6.2216
3 2.7027 5.855
3 2.891 6.0381
3 2.9622 6.3197
3 3.3382 6.3632
3 3.0187 5.7204
3 3.0559 6.5856
3 2.7205 5.865
3 2.6009 6.1153
3 2.7392 5.6576
3 3.1698 6.0758
3 3.3396 6.3712
3 2.9096 5.522
3 3.105 6.1714
3 2.9602 6.1118
3 3.4558 6.1802
3 2.8195 5.5693
3 3.1139 5.8101
3 3.0492 5.9645
3 3.451 5.7826
3 3.3912 6.195
3 2.8793 5.9447
3 2.9069 6.069
3 2.5756 5.6331
3 2.9775 6.2627
3 2.6917 5.5368
3 3.0577 6.0647
3 2.7511 5.8085
3 2.535 5.0883
3 2.3498 6.2257
3 3.011 6.413
3 3.1564 6.3013
3 2.7182 5.9668
3 2.7663 5.8458
3 2.4842 5.7768
3 3.4016 5.8005
3 3.5514 5.8075
3 3.356 5.8858
3 3.1057 6.3061
3 3.4432 5.9784
3 3.541 5.8704
3 3.1079 6.0222
3 2.9206 6.4314
3 2.8793 6.2424
3 3.1371 5.6366
3 3.2722 6.1326
4 6.6044 5.4906
4 6.8257 4.8702
4 7.0364 4.8652
4 7.2246 4.74
4 7.1077 5.4081
4 6.6286 5.0261
4 6.8289 4.9542
4 6.4988 4.8895
4 6.811 5.1143
4 6.5591 4.817
4 7.0664 5.1008
4 6.81 5.2355
4 7.5544 5.1802
4 7.1528 4.8791
4 6.4411 5.2578
4 6.7477 4.8343
4 7.4801 4.8656
4 6.2978 4.3821
4 6.7147 5.4213
4 6.4965 4.8323
4 7.4137 4.5293
4 7.2749 5.2806
4 7.543 5.2511
4 6.7118 5.2895
4 7.1741 5.2429
4 6.914 5.0772
4 7.5856 5.3146
4 6.7756 5.1347
4 7.4576 4.4974
4 6.7791 6.0667
4 7.3444 4.9963
4 6.8172 5.011
4 6.6711 4.8114
4 7.1353 5.3525
4 6.5235 5.2443
4 6.8068 5.5866
4 6.8967 4.8194
4 7.5556 4.6326
4 6.842 4.7881
4 6.8945 5.2523
4 7.3522 5.1422
4 7.0194 4.9704
4 7.4171 5.1301
4 6.6505 4.891
4 7.2328 4.8636
4 7.4017 4.7617
4 7.6063 5.0272
4 7.2716 5.0681
4 6.7997 5.3631
4 6.8852 5.1578
4 7.0737 5.0648

你可能感兴趣的:(#,C++)