数据挖掘聚类算法--Kmeans

算法采用数据集为iris(鸢尾花)可以在UCI上下载 http://archive.ics.uci.edu/ml/datasets/Iris

数据集介绍:

1. sepal length in cm

2. sepal width in cm

3. petal length in cm

4. petal width in cm

5. class:

-- Iris Setosa

-- Iris Versicolour

-- Iris Virginica
View Code

直接上代码:

 1 package neugle.kmeans;

 2 

 3 public class IrisModel {

 4     public double Sep_len = 0;

 5     public double Sep_wid = 0;

 6     public double Pet_len = 0;

 7     public double Pet_wid = 0;

 8     public String Iris_type = "";

 9 

10     public boolean equals(Object obj) {

11         IrisModel iris = (IrisModel) obj;

12         return this.Sep_len == iris.Sep_len && this.Sep_wid == iris.Sep_wid

13                 && this.Pet_len == iris.Pet_len && this.Pet_wid == iris.Pet_wid;

14     }

15 }
  1 package neugle.kmeans;

  2 

  3 import java.io.BufferedReader;

  4 import java.io.FileNotFoundException;

  5 import java.io.FileReader;

  6 import java.io.IOException;

  7 import java.util.ArrayList;

  8 import java.util.Iterator;

  9 

 10 public class Kmeans {

 11     private static int k = 3;// 划分簇数目

 12     private static int dataCount = 150;// 文本数量

 13     private static int n = 0;// 迭代次数

 14 

 15     public static void main(String[] args) {

 16         ArrayList<IrisModel> irisList = ReadFile();// 取得文本中数据

 17         ArrayList<IrisModel> beforeRandomPot = new ArrayList<IrisModel>();// 记录上一次质心位置

 18         ArrayList<IrisModel> randomPot = RandomPot(irisList);// 获得随机数据

 19         ArrayList<ArrayList<IrisModel>> kMeansList = null;

 20         while (!CompareRandomPot(beforeRandomPot, randomPot)) {

 21             kMeansList = KMeans(irisList, randomPot);// 进行n次聚类

 22             n++;

 23         }

 24         Print(kMeansList);

 25         System.out.println("迭代了" + n + "次");

 26     }

 27 

 28     // 读取文件中数据

 29     private static ArrayList<IrisModel> ReadFile() {

 30         FileReader read = null;

 31         BufferedReader br = null;

 32         ArrayList<IrisModel> irisList = new ArrayList<IrisModel>();

 33         try {

 34             read = new FileReader("D:\\iris.data");

 35             br = new BufferedReader(read);

 36             String readLine = null;

 37             while ((readLine = br.readLine()) != null) {

 38                 IrisModel iris = new IrisModel();

 39                 String[] agrs = readLine.split(",");

 40                 iris.Sep_len = Double.parseDouble(agrs[0]);

 41                 iris.Sep_wid = Double.parseDouble(agrs[1]);

 42                 iris.Pet_len = Double.parseDouble(agrs[2]);

 43                 iris.Pet_wid = Double.parseDouble(agrs[3]);

 44                 iris.Iris_type = agrs[4];

 45                 irisList.add(iris);

 46             }

 47         } catch (FileNotFoundException e) {

 48             System.out.println("读取文件异常");

 49             irisList = null;

 50         } catch (IOException e) {

 51             System.out.println("读取文件异常");

 52             irisList = null;

 53         } finally {

 54             try {

 55                 br.close();

 56             } catch (IOException e) {

 57                 System.out.println("关闭文件异常");

 58             }

 59         }

 60         return irisList;

 61     }

 62 

 63     // 随机生成初始k个点

 64     private static ArrayList<IrisModel> RandomPot(ArrayList<IrisModel> irisList) {

 65         ArrayList<Integer> initCenter = new ArrayList<Integer>();

 66         ArrayList<IrisModel> randomPot = new ArrayList<IrisModel>();

 67         for (int i = 0; i < k; i++) {

 68             int num = (int) (Math.random() * dataCount);

 69             if (!initCenter.contains(num))

 70                 initCenter.add(num);

 71             else

 72                 i--;

 73         }

 74         Iterator<Integer> i = initCenter.iterator();

 75         while (i.hasNext()) {

 76             randomPot.add(irisList.get(i.next()));

 77         }

 78         return randomPot;

 79     }

 80 

 81     // KMeans主程序

 82     private static ArrayList<ArrayList<IrisModel>> KMeans(

 83             ArrayList<IrisModel> irisList, ArrayList<IrisModel> randomPot) {

 84         ArrayList<ArrayList<IrisModel>> groupNum = new ArrayList<ArrayList<IrisModel>>();

 85         for (int i = 0; i < randomPot.size(); i++) {

 86             ArrayList<IrisModel> list = new ArrayList<IrisModel>();

 87             list.add(randomPot.get(i));

 88             groupNum.add(list);

 89         }

 90         for (int i = 0; i < irisList.size(); i++) {

 91             double temp = Double.MAX_VALUE;

 92             int flag = -1;

 93             for (int j = 0; j < randomPot.size(); j++) {

 94                 double distance = DistanceOfTwoPoint(irisList.get(i),

 95                         randomPot.get(j));

 96                 if (distance < temp) {

 97                     temp = distance;

 98                     flag = j;

 99                 }

100             }

101             groupNum.get(flag).add(irisList.get(i));

102         }

103         // 重新计算质心

104         ArrayList<IrisModel> tempList = CalcCenter(groupNum);

105         randomPot.clear();

106         for (int i = 0; i < tempList.size(); i++) {

107             randomPot.add(tempList.get(i));

108         }

109         return groupNum;

110     }

111 

112     // 计算两点欧氏距离

113     private static double DistanceOfTwoPoint(IrisModel d1, IrisModel d2) {

114         double sum = Math.sqrt(Math.pow((d1.Sep_len - d2.Sep_len), 2)

115                 + Math.pow((d1.Sep_wid - d2.Sep_wid), 2)

116                 + Math.pow((d1.Pet_len - d2.Pet_len), 2)

117                 + Math.pow((d1.Pet_wid - d2.Pet_wid), 2));

118         return sum;

119     }

120 

121     // 重新计算k个簇的质心

122     private static ArrayList<IrisModel> CalcCenter(

123             ArrayList<ArrayList<IrisModel>> c) {

124         ArrayList<IrisModel> cIris = new ArrayList<IrisModel>();

125         Iterator<ArrayList<IrisModel>> i = c.iterator();

126         while (i.hasNext()) {

127             ArrayList<IrisModel> irisList = i.next();

128             IrisModel eIris = new IrisModel();

129             for (int k = 0; k < irisList.size(); k++) {

130                 eIris.Sep_len += irisList.get(k).Sep_len;

131                 eIris.Sep_wid += irisList.get(k).Sep_wid;

132                 eIris.Pet_len += irisList.get(k).Pet_len;

133                 eIris.Pet_wid += irisList.get(k).Pet_wid;

134             }

135             eIris.Sep_len = eIris.Sep_len / irisList.size();

136             eIris.Sep_wid = eIris.Sep_wid / irisList.size();

137             eIris.Pet_len = eIris.Pet_len / irisList.size();

138             eIris.Pet_wid = eIris.Pet_wid / irisList.size();

139             cIris.add(eIris);

140         }

141 

142         return cIris;

143     }

144 

145     // 比较前后两次的质心,以确定是否结束

146     private static Boolean CompareRandomPot(

147             ArrayList<IrisModel> beforeRandomPot, ArrayList<IrisModel> randomPot) {

148         boolean flag = true;

149         for (int i = 0; i < randomPot.size(); i++) {

150             if (beforeRandomPot.size() <= 0

151                     || !beforeRandomPot.contains(randomPot.get(i))) {

152                 flag = false;

153                 break;

154             }

155         }

156         if (flag == false) {

157             if (beforeRandomPot.size() > 0) {

158                 beforeRandomPot.clear();

159             }

160             for (int i = 0; i < randomPot.size(); i++) {

161                 beforeRandomPot.add(randomPot.get(i));

162             }

163         }

164         return flag;

165     }

166 

167     // 打印

168     private static void Print(ArrayList<ArrayList<IrisModel>> kmeansList) {

169         System.out.println("------------------------------------");

170         Iterator<ArrayList<IrisModel>> i = kmeansList.iterator();

171         while (i.hasNext()) {

172             Iterator<IrisModel> ii = i.next().iterator();

173             int n = 0;

174             while (ii.hasNext()) {

175                 n++;

176                 IrisModel irisModel = ii.next();

177                 if (n == 1)

178                     continue;

179                 System.out.println(irisModel.Sep_len + " " + irisModel.Sep_wid

180                         + " " + irisModel.Pet_len + " " + irisModel.Pet_wid

181                         + " " + irisModel.Iris_type);

182             }

183             System.out.println(n - 1);

184             System.out.println("------------------------------------");

185         }

186     }

187 }

实验结果:

------------------------------------

7.0 3.2 4.7 1.4 Iris-versicolor

6.4 3.2 4.5 1.5 Iris-versicolor

5.5 2.3 4.0 1.3 Iris-versicolor

6.5 2.8 4.6 1.5 Iris-versicolor

5.7 2.8 4.5 1.3 Iris-versicolor

6.3 3.3 4.7 1.6 Iris-versicolor

4.9 2.4 3.3 1.0 Iris-versicolor

6.6 2.9 4.6 1.3 Iris-versicolor

5.2 2.7 3.9 1.4 Iris-versicolor

5.0 2.0 3.5 1.0 Iris-versicolor

5.9 3.0 4.2 1.5 Iris-versicolor

6.0 2.2 4.0 1.0 Iris-versicolor

6.1 2.9 4.7 1.4 Iris-versicolor

5.6 2.9 3.6 1.3 Iris-versicolor

6.7 3.1 4.4 1.4 Iris-versicolor

5.6 3.0 4.5 1.5 Iris-versicolor

5.8 2.7 4.1 1.0 Iris-versicolor

6.2 2.2 4.5 1.5 Iris-versicolor

5.6 2.5 3.9 1.1 Iris-versicolor

5.9 3.2 4.8 1.8 Iris-versicolor

6.1 2.8 4.0 1.3 Iris-versicolor

6.3 2.5 4.9 1.5 Iris-versicolor

6.1 2.8 4.7 1.2 Iris-versicolor

6.4 2.9 4.3 1.3 Iris-versicolor

6.6 3.0 4.4 1.4 Iris-versicolor

6.8 2.8 4.8 1.4 Iris-versicolor

6.0 2.9 4.5 1.5 Iris-versicolor

5.7 2.6 3.5 1.0 Iris-versicolor

5.5 2.4 3.8 1.1 Iris-versicolor

5.5 2.4 3.7 1.0 Iris-versicolor

5.8 2.7 3.9 1.2 Iris-versicolor

6.0 2.7 5.1 1.6 Iris-versicolor

5.4 3.0 4.5 1.5 Iris-versicolor

6.0 3.4 4.5 1.6 Iris-versicolor

6.7 3.1 4.7 1.5 Iris-versicolor

6.3 2.3 4.4 1.3 Iris-versicolor

5.6 3.0 4.1 1.3 Iris-versicolor

5.5 2.5 4.0 1.3 Iris-versicolor

5.5 2.6 4.4 1.2 Iris-versicolor

6.1 3.0 4.6 1.4 Iris-versicolor

5.8 2.6 4.0 1.2 Iris-versicolor

5.0 2.3 3.3 1.0 Iris-versicolor

5.6 2.7 4.2 1.3 Iris-versicolor

5.7 3.0 4.2 1.2 Iris-versicolor

5.7 2.9 4.2 1.3 Iris-versicolor

6.2 2.9 4.3 1.3 Iris-versicolor

5.1 2.5 3.0 1.1 Iris-versicolor

5.7 2.8 4.1 1.3 Iris-versicolor

5.8 2.7 5.1 1.9 Iris-virginica

4.9 2.5 4.5 1.7 Iris-virginica

5.7 2.5 5.0 2.0 Iris-virginica

5.8 2.8 5.1 2.4 Iris-virginica

6.0 2.2 5.0 1.5 Iris-virginica

5.6 2.8 4.9 2.0 Iris-virginica

6.3 2.7 4.9 1.8 Iris-virginica

6.2 2.8 4.8 1.8 Iris-virginica

6.1 3.0 4.9 1.8 Iris-virginica

6.3 2.8 5.1 1.5 Iris-virginica

6.0 3.0 4.8 1.8 Iris-virginica

5.8 2.7 5.1 1.9 Iris-virginica

6.3 2.5 5.0 1.9 Iris-virginica

5.9 3.0 5.1 1.8 Iris-virginica

62

------------------------------------

5.1 3.5 1.4 0.2 Iris-setosa

4.9 3.0 1.4 0.2 Iris-setosa

4.7 3.2 1.3 0.2 Iris-setosa

4.6 3.1 1.5 0.2 Iris-setosa

5.0 3.6 1.4 0.2 Iris-setosa

5.4 3.9 1.7 0.4 Iris-setosa

4.6 3.4 1.4 0.3 Iris-setosa

5.0 3.4 1.5 0.2 Iris-setosa

4.4 2.9 1.4 0.2 Iris-setosa

4.9 3.1 1.5 0.1 Iris-setosa

5.4 3.7 1.5 0.2 Iris-setosa

4.8 3.4 1.6 0.2 Iris-setosa

4.8 3.0 1.4 0.1 Iris-setosa

4.3 3.0 1.1 0.1 Iris-setosa

5.8 4.0 1.2 0.2 Iris-setosa

5.7 4.4 1.5 0.4 Iris-setosa

5.4 3.9 1.3 0.4 Iris-setosa

5.1 3.5 1.4 0.3 Iris-setosa

5.7 3.8 1.7 0.3 Iris-setosa

5.1 3.8 1.5 0.3 Iris-setosa

5.4 3.4 1.7 0.2 Iris-setosa

5.1 3.7 1.5 0.4 Iris-setosa

4.6 3.6 1.0 0.2 Iris-setosa

5.1 3.3 1.7 0.5 Iris-setosa

4.8 3.4 1.9 0.2 Iris-setosa

5.0 3.0 1.6 0.2 Iris-setosa

5.0 3.4 1.6 0.4 Iris-setosa

5.2 3.5 1.5 0.2 Iris-setosa

5.2 3.4 1.4 0.2 Iris-setosa

4.7 3.2 1.6 0.2 Iris-setosa

4.8 3.1 1.6 0.2 Iris-setosa

5.4 3.4 1.5 0.4 Iris-setosa

5.2 4.1 1.5 0.1 Iris-setosa

5.5 4.2 1.4 0.2 Iris-setosa

4.9 3.1 1.5 0.1 Iris-setosa

5.0 3.2 1.2 0.2 Iris-setosa

5.5 3.5 1.3 0.2 Iris-setosa

4.9 3.1 1.5 0.1 Iris-setosa

4.4 3.0 1.3 0.2 Iris-setosa

5.1 3.4 1.5 0.2 Iris-setosa

5.0 3.5 1.3 0.3 Iris-setosa

4.5 2.3 1.3 0.3 Iris-setosa

4.4 3.2 1.3 0.2 Iris-setosa

5.0 3.5 1.6 0.6 Iris-setosa

5.1 3.8 1.9 0.4 Iris-setosa

4.8 3.0 1.4 0.3 Iris-setosa

5.1 3.8 1.6 0.2 Iris-setosa

4.6 3.2 1.4 0.2 Iris-setosa

5.3 3.7 1.5 0.2 Iris-setosa

5.0 3.3 1.4 0.2 Iris-setosa

50

------------------------------------

6.9 3.1 4.9 1.5 Iris-versicolor

6.7 3.0 5.0 1.7 Iris-versicolor

6.3 3.3 6.0 2.5 Iris-virginica

7.1 3.0 5.9 2.1 Iris-virginica

6.3 2.9 5.6 1.8 Iris-virginica

6.5 3.0 5.8 2.2 Iris-virginica

7.6 3.0 6.6 2.1 Iris-virginica

7.3 2.9 6.3 1.8 Iris-virginica

6.7 2.5 5.8 1.8 Iris-virginica

7.2 3.6 6.1 2.5 Iris-virginica

6.5 3.2 5.1 2.0 Iris-virginica

6.4 2.7 5.3 1.9 Iris-virginica

6.8 3.0 5.5 2.1 Iris-virginica

6.4 3.2 5.3 2.3 Iris-virginica

6.5 3.0 5.5 1.8 Iris-virginica

7.7 3.8 6.7 2.2 Iris-virginica

7.7 2.6 6.9 2.3 Iris-virginica

6.9 3.2 5.7 2.3 Iris-virginica

7.7 2.8 6.7 2.0 Iris-virginica

6.7 3.3 5.7 2.1 Iris-virginica

7.2 3.2 6.0 1.8 Iris-virginica

6.4 2.8 5.6 2.1 Iris-virginica

7.2 3.0 5.8 1.6 Iris-virginica

7.4 2.8 6.1 1.9 Iris-virginica

7.9 3.8 6.4 2.0 Iris-virginica

6.4 2.8 5.6 2.2 Iris-virginica

6.1 2.6 5.6 1.4 Iris-virginica

7.7 3.0 6.1 2.3 Iris-virginica

6.3 3.4 5.6 2.4 Iris-virginica

6.4 3.1 5.5 1.8 Iris-virginica

6.9 3.1 5.4 2.1 Iris-virginica

6.7 3.1 5.6 2.4 Iris-virginica

6.9 3.1 5.1 2.3 Iris-virginica

6.8 3.2 5.9 2.3 Iris-virginica

6.7 3.3 5.7 2.5 Iris-virginica

6.7 3.0 5.2 2.3 Iris-virginica

6.5 3.0 5.2 2.0 Iris-virginica

6.2 3.4 5.4 2.3 Iris-virginica

38

------------------------------------

迭代了16次
View Code

你可能感兴趣的:(数据挖掘)