// Clustreing_Algorithm.cpp : 定义控制台应用程序的入口点。
//
#include "stdafx.h"
#include
#include
#include
#include
#include
#include
#include
using namespace std;
#define FILENAME_LENGTH 20//文件名长度限制
#define BUFFER_LENGTH 50//缓存长度限制
#define CENTER_LENGTH 10//存放聚类中心下标数组的长度限制
//结构体数据
struct Data
{
int type;//原始文件数据类别
float x;
float y;
int result=0;//分类结果
int change = 0;//分类结果是否发生了变化,1代表修改了result结果,0代表没有修改
};
//计算最大距离最小距离过程中使用的结构体/作废
struct Find
{
int index;
double distance;
};
vector dataSet;//存放所有的数据集
//vector findStruct;
int center[CENTER_LENGTH];//存放聚类中心下标的数组
/*从文件中读取数据*/
void readData(char *filename)
{
int i = 0;
int type;
float x;
float y;
char buffer[BUFFER_LENGTH];
Data datatemp;
ifstream file(filename);
if (!file.is_open())
{
cout << "Error:open the file!" << endl;
exit(1);
}
while (!file.eof())
{
file.getline(buffer,BUFFER_LENGTH);
sscanf_s(buffer, "%d %f %f", &type, &x, &y);
datatemp.type = type;
datatemp.x = x;
datatemp.y = y;
dataSet.push_back(datatemp);
}
file.close();
}
/*数据输出,返回值为数据的行数*/
int outputData()
{
cout << endl;
int i = 0;
cout << setw(6) << "序号" << setw(10) <<"文件类型"<< setw(10) << "特征1"<< setw(18) << "特征2" << setw(10) << "分类" << endl;
for (i = 0; i < dataSet.size(); i++)
{
cout << setw(6) << i + 1 << ":" << setw(4) << dataSet.at(i).type << setw(16) << dataSet.at(i).x << setw(16) << dataSet.at(i).y << setw(10) << dataSet.at(i).result << endl;
}
return i;
}
/*计算两个点之间的距离*/
double getDistance(Data a,Data b)
{
return sqrt((a.x - b.x)*(a.x - b.x) + (a.y - b.y)*(a.y - b.y));
}
/*计算数组中最大的元素,参数为数组、数组长度,返回下标*/
int getMax(vector array,int size)
{
double max=array[0];
int index = 0, i = 0;
for (i = 0; i < size; i++)
{
if (array[i] > max)
{
max = array[i];
index = i;
}
}
return index;
}
/*最大最小距离算法进行聚类,参数是数据的行数,返回值是K-means的K值大小*/
int maxMin(int line)
{
int flag = 1;
int chongfu = 0;//标志位,判断聚类中心是否重复
double a = 0.4;//给定参数a,0 distancei0(line);//变长数组存放所有点和第一个聚类中心之间的距离
vector distancei1(line);//变长数组存放所有点和第二个聚类中心之间的距离
vector distanceMin(line);//临时变量
vector> matrix;//二维矩阵存储每个模式到聚类中心的距离
double distanceMax,min,max;//临时变量
double diatance12;//聚类中心1和聚类中心2之间的距离
int index = 0;//下标
int centerP = 0;//指示当前元素下标
//选取第一个样本为聚类中心,计算其他点和第一个点之间的距离
for (int i = 0; i < dataSet.size(); i++)
{
distancei0[i] = getDistance(dataSet.at(i), dataSet.at(0));
}
matrix.push_back(distancei0);
//计算距离第一个点最远的那个点
distanceMax = distancei0[0];
center[0] = 0;
for (int i = 0; i < distancei0.size(); i++)
{
if (distancei0[i] > distanceMax)
{
distanceMax = distancei0[i];
index = i;
}
}
center[++centerP] = index;//第二个聚类中心
diatance12 = getDistance(dataSet.at(0), dataSet.at(index));//计算聚类中心1和聚类中心2之间的距离
//计算所有的点与第二个聚类中心之间的距离
for (int i = 0; i < distancei1.size(); i++)
{
distancei1[i] = getDistance(dataSet.at(i), dataSet.at(center[1]));
}
matrix.push_back(distancei1);
//计算全部聚类中心
while (1)
{
//最小距离
for (int j = 0; j < line; j++)
{
min = matrix.at(0).at(j);//初始化最小值
for (int i = 0; i < matrix.size(); i++)
{
if (matrix.at(i).at(j) < min)
{
min = matrix.at(i).at(j);
}
}
distanceMin[j] = min;
}
//最大距离
index = getMax(distanceMin, line);
//判断是否产生新的聚类中心
if ((distanceMin[index] > (a*diatance12)))
{
center[++centerP] = index;//新的聚类中心
//若产生新的聚类中心,则需要计算其他所有样本到新聚类中心的距离
vector newdistance(line);
for (int i = 0; i < line; i++)
{
newdistance[i] = getDistance(dataSet.at(i), dataSet.at(index));
}
//新产生的向量加入矩阵
matrix.push_back(newdistance);
}
else break;
}
//输出聚类中心
index = 0;
cout < array, int size)
{
double min = array[0];
int index = 0, i = 0;
for (i = 0; i < size; i++)
{
if (array[i] < min)
{
min = array[i];
index = i;
}
}
return index;
}
/*求得当前分类的个数,参数Kmeans的K值和数据集的大小*作废
vector getCount(int k,int line)
{
vector count(k);
for (int i = 0; i < line; i++)
{
for (int j = 0; j < k; j++)
{
if (dataSet.at(i).type == j)
{
count[j] += 1;
}
}
}
return count;
}
/*Kmeans算法,参数为K值*/
vector Kmeans(int k,int line)
{
int index = 0;
double x=0;
double y=0;
int flag = 1;
//vector > result(k);
vector count(k);//计算每个类别的个数
vector distanceToClassHeart(k);//每个模式距离K个类心的距离
vector newCenter(k);//类心
//newCenter初始化
for (int i = 0; i < k; i++)
{
newCenter[i] = dataSet.at(center[i]);
}
while (flag==1)
{
//change初始化
for (int i = 0; i < line; i++)
{
dataSet.at(i).change = 0;
}
//计算所有模式和新的类心之间的距离
for (int i = 0; i < line; i++)
{
for (int j = 0; j < k; j++)
{
distanceToClassHeart[j] = getDistance(dataSet.at(i), newCenter.at(j));
}
index = getMin(distanceToClassHeart, k);//获取距离当前模式欧氏距离最近的类心的下标
if (dataSet.at(i).result != index)//每次大循环的时候都先初始化所有data中change的数值为0,如果本次发生改变就修改change的数值
{
dataSet.at(i).change = 1;
}
dataSet.at(i).result = index;//分类
//count[index] += 1;
//result.at(index).resize(count[index]);
//int p = count[index] - 1;
//int p=result.at(index).size()-1;
//result.at(index).at(p) = i;//ok
}
//求得新的类心
for (int i = 0; i < k; i++)
{
x = 0; y = 0; count[i] = 0;
for (int j = 0; j < line; j++)
{
if (dataSet.at(j).result == i)
{
x += dataSet.at(j).x;
y += dataSet.at(j).y;
count[i] += 1;
}
}
newCenter[i].x = x / count[i];
newCenter[i].y = y / count[i];
}
/*
for (int i = 0; i < k; i++)
{
x = 0;
y = 0;
for (int j = 0; j < count[i]; j++)
{
x += dataSet.at(result[i][j]).x;//result长度在第三次迭代时变成了0???溢出。。。
y += dataSet.at(result[i][j]).y;
}
Data newclassheart;
newclassheart.x = x / count[i];
newclassheart.y = y / count[i];
newCenter[i] = newclassheart;
count[i] = 0;//清零计数数组
}*/
//计算所有模式和新的类心之间的距离
//vector > result(k);
/*
for (int i = 0; i < line; i++)
{
for (int j = 0; j < k; j++)
{
distanceToClassHeart[j] = getDistance(dataSet.at(i), newCenter.at(j));
}
index = getMin(distanceToClassHeart, k);//获取距离当前模式欧氏距离最近的类心的下标
if (dataSet.at(i).result != index)//每次大循环的时候都先初始化所有data中change的数值为0,如果本次发生改变就修改change的数值
{
dataSet.at(i).change = 1;
}
dataSet.at(i).result = index;//分类
//result[index][count[index] - 1] = i;//当前模式分类情况写入result二维数组
count[index] += 1;
//result.at(index).resize(count[index]);
//int p = count[index] - 1;
//int p = result.at(index).size() - 1;
//result.at(index).at(p) = i;
}*/
bool sum = false;
for (int i = 0; i < line; i++)
{
if (dataSet.at(i).change == 1)
{
sum = true;
break;
}
}
if (!sum)
flag=0;
}
return newCenter;
}
void outputResult()
{
cout << endl;
int i = 0;
cout << setw(6) << "序号" << setw(10) << "特征1" << setw(18) << "特征2" << setw(10) << "分类" << endl;
for (i = 0; i < dataSet.size(); i++)
{
cout << setw(6) << i + 1 << ":" << setw(16) << dataSet.at(i).x << setw(16) << dataSet.at(i).y << setw(10) << dataSet.at(i).result << endl;
}
}
/*输出经过Kmeans聚类之后每个类别的个数*/作废
void outputResult(int k,vectorcount)
{
cout << endl << "共分成" << k << "类:" << endl;
for (int i = 0; i < k; i++)
{
cout << "第" << i + 1 << "类共有:" << setw(6) << count[i] << "个模式" << endl;
}
cout << endl;
}
/*类间距离,使用重心距离法*/
void centerToCenter(vector array)
{
int k = array.size();
vector> distance(k);
/*输出Kmeans之后的质心*/
cout << endl;
for (int i = 0; i < array.size(); i++)
{
cout << "第"< arrayCount;
//程序提示
char filename[FILENAME_LENGTH];
cout << "请输入要打开的文件名,如:4k2_far.txt" << endl;
cin >> filename;
//读入数据
readData(filename);
//输出数据
line=outputData();
//最大最小距离算法
k=maxMin(line);
//K-means算法聚类
arrayCount=Kmeans(k,line);
//输出聚类之后的数据
outputResult();
//输出类间距离
centerToCenter(arrayCount);
system("pause");
return 0;
}
文件
1 2.7266 3.0102
1 3.1304 2.4673
1 3.0492 2.525
1 3.226 3.1649
1 2.7223 2.5713
1 3.2862 2.8255
1 3.111 3.2994
1 3.2398 2.9681
1 2.8661 2.5533
1 3.2616 3.4902
1 1.99 3.2137
1 2.7017 2.61
1 3.0131 3.5208
1 2.8395 2.6816
1 2.9831 3.1657
1 3.7537 2.6608
1 3.0544 2.6474
1 3.3826 3.2356
1 3.2362 3.1535
1 3.0911 2.7883
1 2.4905 2.8723
1 2.8458 2.7137
1 2.7267 3.1528
1 3.1643 3.0671
1 3.3906 2.3585
1 2.1004 3.0724
1 2.8911 3.2043
1 2.6157 2.1725
1 3.1961 3.1735
1 1.7841 3.0763
1 3.4923 3.4455
1 3.4772 3.3968
1 3.3189 3.5495
1 3.2798 2.1895
1 2.2937 3.3527
1 2.8161 3.2286
1 2.3536 3.5656
1 3.3436 2.9659
1 3.0465 2.5304
1 3.9403 3.0006
1 2.9572 2.5322
1 3.1434 2.7548
1 2.9806 3.0031
1 3.6446 2.7736
1 2.8164 3.6278
1 3.4821 4.0864
1 3.6661 2.6847
1 2.3413 3.4814
1 3.2312 2.7373
1 2.6234 2.5361
1 3.2563 2.973
1 2.8906 2.0936
1 3.1462 3.7418
1 2.6438 2.7712
1 3.5794 2.373
1 2.3408 3.4678
1 2.9322 3.1776
1 3.2282 3.4508
1 2.8451 2.6851
1 3.1893 2.7909
1 2.7477 2.3049
1 2.5491 3.6024
1 3.3117 3.0164
1 3.0925 3.1162
1 2.884 2.7418
1 3.118 2.6412
1 3.4545 2.0397
1 2.4624 3.095
1 2.5876 2.6469
1 3.0391 3.5873
1 2.6821 3.3018
1 1.9979 2.7849
1 3.6046 4.4167
1 2.8052 2.3741
1 3.3704 3.5485
1 2.5016 2.8316
1 2.4297 2.3209
1 3.5564 2.5427
1 2.8552 2.5938
1 2.5227 2.2445
1 3.6131 3.0072
1 3.5096 3.6762
1 2.8118 2.9116
1 3.4572 3.1999
1 3.2817 2.5862
1 3.1585 2.6506
1 3.3324 2.3196
1 3.282 3.3363
1 2.9604 3.1444
1 2.7604 2.7121
1 2.953 2.5977
1 2.518 3.3706
1 3.1038 3.2042
1 2.6108 3.2932
1 2.7133 3.5879
1 3.022 3.2819
1 3.2887 3.6587
1 2.7811 2.5675
1 3.1395 2.8455
1 2.8075 2.9971
1 2.5235 3.3553
1 3.3622 3.1813
1 2.6712 4.1989
1 3.1562 3.9358
1 2.9157 3.2074
1 2.3513 2.7011
1 3.2596 2.7134
1 2.7007 2.8752
1 3.0785 2.813
1 3.5539 2.7665
1 3.4948 2.4448
1 3.4598 2.0789
1 2.4891 2.8472
1 3.3896 2.505
1 2.5973 2.8949
1 3.5049 3.6834
1 2.559 2.9035
1 2.1629 3.0291
1 2.5993 2.5164
1 3.1497 3.167
1 2.7022 3.2358
1 3.0475 3.2566
1 3.2959 3.3595
1 2.8036 2.3961
1 2.9221 3.2034
1 3.097 3.7377
1 2.7234 3.4401
1 2.6777 2.7403
1 2.1883 2.8882
1 3.0051 3.3939
1 2.7537 2.5876
1 2.6726 3.4831
1 3.3822 2.4474
1 2.6018 2.4145
1 3.2896 3.308
1 2.7854 2.3219
1 2.574 2.3544
1 2.7345 2.6018
1 4.2289 3.058
1 2.6282 3.4221
1 3.3936 3.1938
1 2.4638 3.8204
1 3.0994 2.4435
1 3.508 3.7114
1 2.6488 2.0721
1 2.2956 3.4115
1 1.8437 3.1186
1 2.83 2.7157
1 2.7668 3.8732
2 7.4256 8.266
2 8.703 7.4874
2 8.4307 7.7491
2 8.0501 8.6516
2 8.5896 8.4566
2 8.0519 8.3274
2 8.4173 7.5257
2 8.579 7.8808
2 8.0708 7.9862
2 8.0786 8.4343
2 8.2881 8.821
2 8.2579 7.8648
2 7.5377 7.2675
2 7.7397 7.5936
2 8.3348 7.9979
2 8.0718 8.6614
2 7.3806 8.1896
2 7.6868 7.733
2 8.2079 7.3411
2 7.7795 7.8079
2 9.2059 7.8076
2 8.5043 8.3472
2 7.4194 7.2467
2 7.6992 7.892
2 7.9446 7.872
2 7.2857 7.1511
2 8.0719 8.0038
2 8.2507 7.8614
2 8.4375 7.5687
2 7.9359 7.9991
2 8.0518 8.7005
2 8.2761 7.984
2 8.7712 7.7401
2 7.9218 7.7272
2 8.475 7.9472
2 8.3559 8.2356
2 8.6742 7.5134
2 7.6074 8.1905
2 7.8984 8.1305
2 8.0892 7.6176
2 8.4908 8.1264
2 7.2717 7.2312
2 7.6027 7.9745
2 8.6869 8.1516
2 7.3566 8.2209
2 8.0468 8.8111
2 8.2436 7.4516
2 8.7382 8.6116
2 7.7793 8.0716
2 7.9856 8.3365
2 8.8393 7.5225
2 7.8965 7.8471
2 8.5374 7.8589
2 8.7422 8.0283
2 8.3421 8.6652
2 7.8882 8.2419
2 7.9541 8.6037
2 8.3586 8.0111
2 7.853 7.6305
2 8.0515 8.1975
2 8.497 8.0638
2 8.8102 7.2967
2 8.4419 7.8188
2 8.2358 7.6146
2 8.1958 7.9179
2 8.1346 7.4925
2 7.0101 8.7125
2 8.2017 7.9962
2 8.1583 8.3481
2 8.1205 7.9192
2 7.6749 7.6971
2 7.5198 8.2414
2 7.7911 8.4015
2 7.8028 8.2066
2 8.3084 7.3386
2 7.756 7.919
2 8.0038 8.0903
2 7.8845 7.1217
2 7.7541 7.8179
2 7.3147 8.3037
2 8.0674 8.2433
2 7.8393 8.482
2 8.1533 8.1583
2 7.8958 7.3012
2 8.0175 8.4396
2 7.6834 7.5814
2 8.6523 8.0479
2 7.6882 7.7331
2 7.8279 8.0414
2 7.6931 7.7269
2 8.2605 8.4346
2 7.6377 7.6857
2 8.2584 8.5359
2 8.0147 8.0012
2 8.7019 7.5311
2 7.405 7.5773
2 7.4772 7.7605
2 8.2869 8.7212
2 8.5142 6.4754
2 7.8035 8.2711
2 8.9912 8.1837
2 8.2268 8.7324
2 7.9031 7.3747
2 8.7893 8.3092
2 8.1417 7.9259
2 8.2175 7.8444
2 8.6563 8.506
2 7.6683 7.4926
2 8.6393 7.0973
2 8.5349 7.8904
2 7.8057 8.3872
2 8.3471 7.6245
2 7.8746 7.8769
2 8.3337 7.8474
2 8.0863 8.2693
2 7.7654 7.836
2 8.3298 8.5828
2 7.8341 7.3557
2 7.0727 8.0753
2 8.6399 7.0368
2 7.9424 7.8251
2 8.0321 7.0214
2 8.4529 8.317
2 8.7172 7.8672
2 7.3876 8.1845
2 8.6239 7.6609
2 8.502 8.2946
2 7.5958 8.3769
2 8.3163 8.0605
2 7.7711 8.05
2 8.8454 8.4523
2 8.4656 8.4876
2 8.604 7.6903
2 8.2437 7.8624
2 7.6448 8.5157
2 8.3876 8.3474
2 7.9066 8.2761
2 7.7884 7.1085
2 7.5304 7.6864
2 8.518 9.0936
2 7.3841 7.5885
2 7.8675 8.1986
2 8.1031 8.0378
2 7.9853 7.8377
2 7.0016 8.2089
2 8.5393 8.3508
2 7.2779 8.4457
3 3.2709 6.1369
3 2.9987 5.9714
3 2.5602 5.9316
3 2.9639 6.0448
3 3.2984 6.1485
3 3.2752 6.3167
3 3.5049 6.3118
3 2.9292 6.2216
3 2.7027 5.855
3 2.891 6.0381
3 2.9622 6.3197
3 3.3382 6.3632
3 3.0187 5.7204
3 3.0559 6.5856
3 2.7205 5.865
3 2.6009 6.1153
3 2.7392 5.6576
3 3.1698 6.0758
3 3.3396 6.3712
3 2.9096 5.522
3 3.105 6.1714
3 2.9602 6.1118
3 3.4558 6.1802
3 2.8195 5.5693
3 3.1139 5.8101
3 3.0492 5.9645
3 3.451 5.7826
3 3.3912 6.195
3 2.8793 5.9447
3 2.9069 6.069
3 2.5756 5.6331
3 2.9775 6.2627
3 2.6917 5.5368
3 3.0577 6.0647
3 2.7511 5.8085
3 2.535 5.0883
3 2.3498 6.2257
3 3.011 6.413
3 3.1564 6.3013
3 2.7182 5.9668
3 2.7663 5.8458
3 2.4842 5.7768
3 3.4016 5.8005
3 3.5514 5.8075
3 3.356 5.8858
3 3.1057 6.3061
3 3.4432 5.9784
3 3.541 5.8704
3 3.1079 6.0222
3 2.9206 6.4314
3 2.8793 6.2424
3 3.1371 5.6366
3 3.2722 6.1326
4 6.6044 5.4906
4 6.8257 4.8702
4 7.0364 4.8652
4 7.2246 4.74
4 7.1077 5.4081
4 6.6286 5.0261
4 6.8289 4.9542
4 6.4988 4.8895
4 6.811 5.1143
4 6.5591 4.817
4 7.0664 5.1008
4 6.81 5.2355
4 7.5544 5.1802
4 7.1528 4.8791
4 6.4411 5.2578
4 6.7477 4.8343
4 7.4801 4.8656
4 6.2978 4.3821
4 6.7147 5.4213
4 6.4965 4.8323
4 7.4137 4.5293
4 7.2749 5.2806
4 7.543 5.2511
4 6.7118 5.2895
4 7.1741 5.2429
4 6.914 5.0772
4 7.5856 5.3146
4 6.7756 5.1347
4 7.4576 4.4974
4 6.7791 6.0667
4 7.3444 4.9963
4 6.8172 5.011
4 6.6711 4.8114
4 7.1353 5.3525
4 6.5235 5.2443
4 6.8068 5.5866
4 6.8967 4.8194
4 7.5556 4.6326
4 6.842 4.7881
4 6.8945 5.2523
4 7.3522 5.1422
4 7.0194 4.9704
4 7.4171 5.1301
4 6.6505 4.891
4 7.2328 4.8636
4 7.4017 4.7617
4 7.6063 5.0272
4 7.2716 5.0681
4 6.7997 5.3631
4 6.8852 5.1578
4 7.0737 5.0648