学习笔记(4)-社区发现评价指标

目前使用的主要有:Q(Modulartiy),Jaccard指数与Fsame值,NMI也是常用指标。

c语言实现:

//当i和J属于同一个社团时,E函数等于l,否则等于0. 故只需计算同一社区的函数值
double Modulartiy(int * cluster_assignment, int ** M, int vertices)
{
    int i, j, k;
    int maxlable = cluster_assignment[0];                   //最大标号
    for (i = 1; i < vertices; i++)
    {
        if (maxlable < cluster_assignment[i])
            maxlable = cluster_assignment[i];
    }
    int TotalLinks;                                                      //总边数 (m)
    TotalLinks= SumSumMartex(M, vertices, vertices)/2;  
    double Q = 0;                                                      //Q函数
    vector <int> index;
    for (i = 1; i <= maxlable; i++)
    {
        FindIndexInVector(cluster_assignment, index, vertices, i); 
        unsigned  len = index.size();
        double li = 0;
        //社区内部边的边数 (I)
        for (j = 0; j < len; j++)
            for (k = 0; k < len; k++)
                li += M[index[j]][index[k]];
        li = li / 2;
        //社区内所有顶点的度之和 (D)
        double di = 0;
        for (j = 0; j < len; j++)
            for (k = 0; k < vertices; k++)
                di = di + M[index[j]][k];
        Q = Q + (li - (di * di) /(TotalLinks*4) );
        index.clear();
    }
    Q = Q / TotalLinks;
    return Q ;
}
/************找出两种分类结果中相同标签的元素个数**********/
double FindjaccardIndex(int* A, int* B, int vertices)
{
    int a,b,c;
    a=0; b=0; c=0;
    for(int i=0; i<vertices; i++)
    {
        for(int j=0; j<vertices; j++)
        {
            if((A[i]==A[j])&&(B[i]==B[j])) a++;
            if((A[i]==A[j])&&(B[i]!=B[j])) b++;
            if((A[i]!=A[j])&&(B[i]==B[j])) c++;
        }
    }
    return a/(double)(a+b+c);
}
/********************计算两种分区结果的Fsame值***************/
double FsameIndex(int*A, int*B, int As, int Bs)
{
    int **S;                                        //生成As*Bs矩阵
    int i, j;
    S = new int * [As];
    for ( i = 0; i < As; i++ )
    {
        S[i] = new int [Bs];
    }
    for ( i = 0; i < As; i++)
    {
        for ( j=0; j<Bs;  j++)
            S[i][j]=0;
    }
    for (j=0; j<Vertices; j++)
    {
        S[A[j]-1][B[j]-1]++;
    }
    //计算fsame的值
    double fsame;
    //辅助队列
    int* r = new int[As];
    for ( i = 0; i < As; i++ ) r[i]=-1;
    int* c = new int[Bs];
    for ( i = 0; i < Bs; i++ ) c[i]=-1;
    //统计每行及每列最大的数
    for ( i = 0; i < As; i++)
    {
        for ( j = 0; j < Bs;  j++)
        {
            if (S[i][j]>r[i]) r[i]=S[i][j];
            if (S[i][j]>c[j]) c[j]=S[i][j];
        }
    }
    //求和
    int x=0, y=0;
    for ( i = 0; i < As; i++ )  x+=r[i];
    for ( i = 0; i < Bs; i++ )  y+=c[i];

    fsame=50.0*(x+y)/(double)Vertices;
    return fsame;
}

Python 实现

def Modulartiy(A, coms, sums):
    Q = 0.0
    for eachc in coms:
        li = 0
        for eachp in coms[eachc]:
            for eachq in coms[eachc]:
                li += A[eachp][eachq]
        li /= 2
        di = 0
        for eachp in coms[eachc]:
            for eachq in range(vertices):
                di += A[eachp][eachq]
        Q = Q + (li - (di * di) /(sums*4))
    Q = Q / float(sums)
    return Q

def JaccardIndex(cluA,cluB):
    a, b, c = 0
    for i in range(vertices):
        for j in range(vertices):
            if cluA[i] == cluA[j] and cluB[i] == cluB[j]:
                a += 1
            if cluA[i] == cluA[j] and cluB[i] == cluB[j]:
                b += 1
            if cluA[i] == cluA[j] and cluB[i] == cluB[j]:
                c += 1
    return a/(double)(a+b+c)

def FsameIndex(cluA,cluB):
    S = mat([[0 for i in range(len(cluB))] for j in range(len(cluA))])
    for i in range(vertices):
        S[cluA[i]][cluB[i]] = 1
    r = sum(S.max(0))
    c = sum(S.max(1))
    fsame = 50.0*(r+c)/float(Vertices)
    return fsame

    def NMI(cluA,cluB):
    #混淆矩阵
    cmat = [[0 for i in range(len(cluA))] for j in range(len(cluB))]
    i = 0
    j = 0
    for eacha in cluA:
        for eachb in cluB:
            cmat[i][j] = len(set(cluA[eacha]) & set(cluB[eachb]))
            j += 1
        i += 1
        j = 0     
    #print cmat
    #the nmi_numerator part
    nmi_numerator = 0.0
    for i in range(len(cluA)):
        for j in range(len(cluB)):
            if (cmat[i][j]!=0):
                row = 0
                column = 0
                for k in range(len(cluB)):
                    row = row + cmat[i][k]
                for l in range(len(cluA)):
                    column = column + cmat[l][j]
                nmi_numerator = nmi_numerator + cmat[i][j] * log10((cmat[i][j] * vertices)/float(row * column))
    nmi_numerator = -2 * nmi_numerator
    #the denominator part
    nmi_denominator1 = 0.0
    nmi_denominator2 = 0.0
    nmi = 0.0
    for i in range(len(cluA)):
        row = 0
        for k in range(len(cluB)):
            row = row + cmat[i][k]
        if(row != 0):
            nmi_denominator1 = nmi_denominator1 + row * log10(row / float(vertices))
    for j in range(len(cluB)):
        column = 0
        for l in range(len(cluA)):
            column = column + cmat[l][j];
        if(column != 0):
            nmi_denominator2 = nmi_denominator2 + column * log10(column / float(vertices))
    nmi_denominator = nmi_denominator1 + nmi_denominator2
    print nmi_numerator,nmi_denominator
    if(nmi_denominator != 0):
        nmi = nmi_numerator/float(nmi_denominator)
    return nmi

附:NMI指数C++:

double normalized_mutual_information(int * clu_assignment1,int * clu_assignment2, int length)
{
    int num_nodes = length;
    int clu_num1 = clu_assignment1[0];
    int clu_num2 = clu_assignment2[0];
    for (int m = 1; m < num_nodes; m++)
    {
        if (clu_num1 < clu_assignment1[m])
            clu_num1 = clu_assignment1[m];
        if (clu_num2 < clu_assignment2[m])
            clu_num2 = clu_assignment2[m];
    }
    // Compute the confusion matrix
    int** cmat;
    cmat = new int * [clu_num1];
    for (int i = 0; i < clu_num1; i++ )
    {
        cmat[i] = new int [clu_num2];
    }
    for (int i = 0; i < clu_num1; i++ )
        for (int j = 0; j < clu_num2; j++ )
        {
            cmat[i][j] = 0;
        }
    for (int i = 0; i < clu_num1; i++ )
    {
        vector <int> index1;
        for (int k = 0; k < num_nodes; k++ )
        {
            if (clu_assignment1[k] == i + 1)
                index1.push_back(k);
        }
        for (int j = 0; j < clu_num2; j++ )
        {
            vector <int> index2;
            for (int k = 0; k < num_nodes; k++ )
            {
                if (clu_assignment2[k] == j + 1)
                    index2.push_back(k);
            }
            int numtemp = 0;
            for (unsigned int m = 0; m < index1.size(); m++ )
            {
                for(unsigned int n = 0; n < index2.size(); n++ )
                {
                    if (index1[m] == index2[n])
                        numtemp = numtemp + 1;
                }
            }
            cmat[i][j] = numtemp;
        }
    }
//Compute the Normalized Mutual Information
//the numerator part
    double nmi_numerator = 0.0;
    for (int i = 0; i < clu_num1; i++ )
    { 
        for (int j = 0; j < clu_num2; j++)
        {
            if (cmat[i][j] != 0)
            {
                int row = 0;
                int column = 0;
                for (int k = 0; k < clu_num2; k++ )
                {
                    row = row + cmat[i][k];
                }
                for (int l = 0; l < clu_num1; l++ )
                {
                    column = column + cmat[l][j];
                }
                nmi_numerator = nmi_numerator + cmat[i][j] * log10(double(cmat[i][j] * num_nodes)/double(row * column)); 
            }
        }
    }
    nmi_numerator = -2 * nmi_numerator;
    /////////the denominator part
    double nmi_denominator1 = 0;
    double nmi_denominator2 = 0;
    for (int i = 0; i < clu_num1; i++ )
    {
        double row = 0;
        for (int k = 0; k < clu_num2; k++ )
            row = row + cmat[i][k];
        nmi_denominator1 = nmi_denominator1 + row * log10(row / num_nodes);
    }
    for (int j = 0; j < clu_num2; j++ )
    {
        double column = 0;
        for (int l = 0; l < clu_num1; l++ )
            column = column + cmat[l][j];
        nmi_denominator2 = nmi_denominator2 + column * log10(column / num_nodes);
    }
    double nmi_denominator = nmi_denominator1 + nmi_denominator2;
    double nmi = nmi_numerator/nmi_denominator;
    for (int i = 0; i < clu_num1;i++)
        delete [] cmat[i];
    delete [] cmat;
    return nmi;
}

你可能感兴趣的:(学习笔记(4)-社区发现评价指标)