Imports System.IO Public Class Form1 Private Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button1.Click TextBox3.Text = "" TextBox2.Text = "" '属性个数 Dim xmcount As Integer = 0 Dim mysr As StreamReader Dim strline As String '是否正例 Dim iszl As Boolean Dim mystr As String() '标题名称 Dim xmname() As String '变型空间 Dim bxkj As New ArrayList() Dim tempsuppose As String() '一般假设 Dim common_suppose As New ArrayList() '特殊假设 Dim specific_suppose As String() '读取样本文件 mysr = New StreamReader("e:\mydata.txt", System.Text.Encoding.Default) '第一行是标题 strline = mysr.ReadLine() TextBox1.Clear() mystr = strline.Split(New Char() {",", ","}) '取属性个数 xmcount = mystr.GetUpperBound(0) ReDim xmname(xmcount) '初始化假设数组 ReDim specific_suppose(xmcount) ReDim tempsuppose(xmcount) For my_i As Integer = 0 To xmcount specific_suppose(my_i) = Nothing Next For my_i As Integer = 0 To xmcount tempsuppose(my_i) = "?" Next common_suppose.Add(tempsuppose) '取得标题内容 xmname = mystr Dim i As Integer = 0 For Each s As String In xmname If s.Trim() <> "" Then TextBox1.AppendText(s & Space(5)) End If Next s '分析样本每一行 While (Not mysr.EndOfStream) strline = mysr.ReadLine() If Trim(strline) = "" Then Exit While TextBox1.AppendText(vbCrLf) mystr = strline.Split(New Char() {",", ","}) '取样本性质 If Trim(mystr(xmcount + 1)) = "是" Then iszl = True Else iszl = False End If TextBox1.AppendText(mystr(0) & Space(5)) '读取一行中的每个属性,最后一个属性指示了样本是正例还是反例,不能做为分析内容 For my_i As Integer = 1 To xmcount TextBox1.AppendText(mystr(my_i) & Space(5)) If iszl Then '正例 '设置特殊假设 If IsNothing(specific_suppose(my_i)) Then '如果特殊假设的该属性未设定,则设定为样本的该属性值 specific_suppose(my_i) = mystr(my_i) ElseIf specific_suppose(my_i) <> "?" And specific_suppose(my_i) <> mystr(my_i) Then '如果特殊假设的该属性已设定,且不为当前样本值,则设定为?,可以接受任何值 specific_suppose(my_i) = "?" End If '设置一般假设 For my_ii As Integer = 0 To common_suppose.Count - 1 tempsuppose = TryCast(common_suppose.Item(my_ii), String()) If Not IsNothing(tempsuppose) Then If tempsuppose(my_i) <> "?" And tempsuppose(my_i) <> mystr(my_i) Then common_suppose.Item(my_ii) = Nothing End If End If Next Else '反例 '设置一般假设 For my_ii As Integer = 0 To common_suppose.Count - 1 tempsuppose = TryCast(common_suppose.Item(my_ii), String()) If Not IsNothing(tempsuppose) Then If mystr(my_i) = tempsuppose(my_i) Then common_suppose.Item(my_ii) = Nothing End If End If Next tempsuppose = Nothing ReDim tempsuppose(xmcount) For my_iii As Integer = 1 To xmcount If my_iii <> my_i Then tempsuppose(my_iii) = "?" Else tempsuppose(my_iii) = specific_suppose(my_i) End If Next '如果没有重复元素则增加 Dim iscf As Boolean For my_ii As Integer = 0 To common_suppose.Count - 1 iscf = True Dim mytempsuppose As String() = TryCast(common_suppose.Item(my_ii), String()) If Not IsNothing(mytempsuppose) Then For mmm_i As Integer = 1 To xmcount If Trim(tempsuppose(mmm_i)) <> Trim(mytempsuppose(mmm_i)) Then iscf = False End If Next End If If iscf And (Not IsNothing(mytempsuppose)) Then Exit For End If Next If Not iscf Then common_suppose.Add(tempsuppose) End If End If Next my_i TextBox1.AppendText(mystr(xmcount + 1) & Space(6)) If Not iszl Then Dim isdel As Boolean For my_ii As Integer = 0 To common_suppose.Count - 1 isdel = True tempsuppose = TryCast(common_suppose.Item(my_ii), String()) If Not IsNothing(tempsuppose) Then For mm_i As Integer = 1 To xmcount If Trim(tempsuppose(mm_i)) <> "?" Then isdel = False End If Next End If If isdel Then common_suppose.Item(my_ii) = Nothing End If Next End If End While For my_i As Integer = 1 To xmcount TextBox2.Text &= specific_suppose(my_i) & Space(6) Next For my_iiii As Integer = 0 To common_suppose.Count - 1 If Not IsNothing(common_suppose.Item(my_iiii)) Then tempsuppose = TryCast(common_suppose.Item(my_iiii), String()) For m_i As Integer = 1 To xmcount TextBox3.Text &= tempsuppose(m_i) & Space(6) Next TextBox3.Text &= vbCrLf End If Next MsgBox("变型空间在特殊假设和一般假设划分出来") End Sub End Class
e:\mydata.txt内容如下:
名称,形状,味道,种类,一般吃法,大小
苹果,圆形,甜味,水果,生吃,小型,是
西瓜,圆形,甜味,水果,生吃,中型,是
苦瓜,长条形,苦味,蔬菜,熟吃,中型,否
香蕉,长条形,甜味,水果,生吃,小型,否
南瓜,圆形,甜味,蔬菜,熟吃,中型,是
荔枝,圆形,甜味,水果,生吃,小型,是
候选消除法用于收集和归纳样本的特征,得出变型空间 ,然后可以对未知样本进行分类方法,它建立在对样本属性集的分析上总结基础上,基本原理是:
1、设立2个假设,一个是一般假设,一个是特殊假设,通过对每条样本的分析,扩大特殊假设,缩小一般假设。
2、假设就是对样本的每个属性值的设定,可以设定为固定值,也可以设定为未知值?即,什么值都可以接受,如果设为nothing,表示任何属性值都不接受
3、每分析一行,就提取每行的样本的各个属性
1)对于正例:
需要删除一般假设中与正例不符合的假设
需要将特殊假设中与正例不符合的属性删除(即改为“?”),开始特殊假设全部为NOTHING,即不接受任何属性值
2)对于反例
需要删除一般假设中与反例符合的假设,然后增加含有与反例不一致的属性值,其他属性值为“?”的假设
3)实质是一般假设说明了反例的特征,特殊假设说明了正例特征