ICode9

精准搜索请尝试: 精确搜索
首页 > 编程语言> 文章详细

统计学习方法第十四章作业:聚类—层次聚类聚合/分裂算法、K_means聚类算法 代码实现

2020-12-08 21:33:39  阅读:220  来源: 互联网

标签:means self list 算法 result way 聚类 append dis


层次聚类聚合/分裂算法

import numpy as np
import copy
import matplotlib.pyplot as plt

class Hierarchical_cluster:

    def __init__(self,k=None,p=2,dis_way='min',c_way='agg'):
        self.k = k
        self.p = p
        self.dis_way = dis_way
        self.c_way = c_way

    def cauclate_dis(self, x1, x2):
        return np.sum(abs(x1 - x2) ** self.p) ** (1 / self.p)

    def create_D_matrix(self):
        self.D = np.zeros((self.n,self.n))
        for i in range(self.n):
            for j in range(self.n):
                if i==j:
                    self.D[i][j] = 0
                if i>j:
                    self.D[i][j] = self.cauclate_dis(self.x[i],self.x[j])
                    self.D[j][i] = self.cauclate_dis(self.x[i],self.x[j])

    def cauclate_cluster_dis(self,c1,c2):
        if self.dis_way == 'min':
            min_ = 999999999
            for i in c1:
                for j in c2:
                    min_ = min(min_,self.D[i][j])
            return min_

        if self.dis_way == 'mean':
            mean1 = np.mean([self.x[i] for i in c1],axis=0)
            mean2 = np.mean([self.x[i] for i in c2],axis=0)
            return self.cauclate_dis(mean1,mean2)

    def split_C(self,C):
        C1 = []
        C2 = []
        max_ = 0
        for i in C:
            for j in C:
                if j >= i:
                    if self.D[i][j] >= max_:
                        max_ = self.D[i][j]
                        max_i = i
                        max_j = j
        C1.append(max_i)
        C2.append(max_j)
        for c in C:
            if c == max_i or c == max_j:
                continue
            elif self.D[max_i][c] >= self.D[max_j][c]:
                C2.append(c)
            else:
                C1.append(c)
        return C1, C2

    def fit(self,x):
        self.x = np.array(x)
        self.n = len(x)
        self.create_D_matrix()

        if self.c_way == 'agg':
            C_way = []
            start_c = []
            for i in range(self.n):
                start_c.append([i])
            C_way.append(start_c)
            iter = 0
            while len(C_way[iter]) > 1:
                num_c = len(C_way[iter])
                c_temp = []
                dis_temp = []
                for c1 in range(num_c):
                    for c2 in range(num_c):
                        if c1 > c2:
                            c_temp.append([c1,c2])
                            dis_temp.append(self.cauclate_cluster_dis(C_way[iter][c1],C_way[iter][c2]))
                min_dis = min(dis_temp)
                min_index = dis_temp.index(min_dis)
                c1 = c_temp[min_index][0]
                c2 = c_temp[min_index][1]
                new_c = copy.deepcopy(C_way[iter])
                new_c.append(new_c[c1]+new_c[c2])
                del new_c[c1]
                del new_c[c2]
                C_way.append(new_c)
                iter += 1
            self.result = C_way


        if self.c_way == 'div':
            C_way = []
            new_c = list(range(self.n))
            C_way.append([new_c])
            iter = 0
            while len(C_way[iter]) < self.n:
                print(C_way)
                new_c = []
                for C in C_way[iter]:
                    if len(C) == 1 :
                        new_c.append(C)
                    else:
                        C1,C2 = self.split_C(C)
                        new_c.append(C1)
                        new_c.append(C2)
                C_way.append(new_c)
                iter+=1
            self.result = C_way

    def predict(self):
        final = np.zeros(len(self.x))
        if self.c_way == 'div':
            result  = self.result[self.k//2+1]
        if self.c_way == 'agg':
            result = self.result[-self.k]
        print(result)
        for i in range(len(result)):
            for j in result[i]:
                final[j] = i
        return list(final)

def main():
    x = [[0,2],[0,0],[1,0],[5,0],[5,2],[1,5],
         [2,4],[3,5],[1,1],[1,4],[5,4],[5,3],[0,1],
         [-1,0],[1,1],[4,4],[3,4]]
    k = 3
    HC = Hierarchical_cluster(k,dis_way='mean',c_way='agg')
    HC.fit(x)
    result = HC.predict()
    print(result)
    color_list =['r','k','b','g']
    for i,c in zip(range(len(result)),color_list[:len(result)]):
        positive_ = np.array(x)[np.array(result) == i]
        plt.scatter([k[0] for k in positive_],[k[1] for k in positive_] , c=c)
    plt.show()

if __name__ == '__main__':
    main()

#-----result-----------------
/usr/bin/python3 /Users/zhengyanzhao/PycharmProjects/tongjixuexi/shixian2/hierarchical_clustering.py
[[3], [2, 1, 14, 8, 12, 0, 13], [16, 6, 7, 9, 5, 15, 10, 11, 4]]
[1.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0]

K_means聚类算法

import numpy as np
import matplotlib.pyplot as plt

class K_menas:
    def __init__(self,k=None,p=2):
        self.k = k
        self.p = p

    def cauclate_dis(self, x1, x2):
        return np.sum(abs(x1 - x2) ** self.p) ** (1 / self.p)

    def fit(self,x):
        self.x = np.array(x)
        self.n = len(x)
        initial_c = np.random.choice(list(range(self.n)),self.k,replace=False)
        self.mean_list = [self.x[i] for i in initial_c]

        while True:
            self.c_list = [[] for _ in range(self.k)]
            self.x_list = [[] for _ in range(self.k)]
            for i in range(self.n):
                min_dis = 9999999
                for j in range(self.k):
                    dis = self.cauclate_dis(self.x[i], self.mean_list[j])
                    if min_dis > dis:
                        min_dis = dis
                        c = j
                self.c_list[c].append(i)
                self.x_list[c].append(self.x[i])
            mean_list_ = np.array([np.mean(c,0) for c in self.x_list])
            if (mean_list_ == self.mean_list).all():
                break
            self.mean_list = mean_list_

    def predict(self):
        dict_ = {}
        for i in range(len(self.c_list)):
            for j in self.c_list[i]:
                dict_[j]=i
        return [dict_[i] for i in range(self.n)]

    def compute_d(self,c):
        max_ = -1
        for c1 in range(len(c)):
            for c2 in range(len(c)):
                if c1 >= c2:
                    max_ = max(max_,self.cauclate_dis(c[c1],c[c2]))
        return max_

    def compute_mean_d(self):
        return np.mean([self.compute_d(i) for i in self.x_list])

def main():
    x = [[0,2],[0,0],[1,0],[5,0],[5,2],[1,5],
         [2,4],[3,5],[1,1],[1,4],[5,4],[5,3],[0,1],
         [-1,0],[1,1],[4,4],[3,4]]
    k_ = 5
    KM = K_menas(k_)
    KM.fit(x)
    result = KM.predict()
    print(result)

    k_list=[]
    for k in [1,2,3,4,5,6,7]:
        KM = K_menas(k)
        KM.fit(x)
        k_list.append([k,KM.compute_mean_d()])
    print(k_list)

    color_list =['r','k','b','g']
    for i,c in zip(range(len(result)),color_list[:len(result)]):
        positive_ = np.array(x)[np.array(result) == i]
        plt.scatter([k[0] for k in positive_],[k[1] for k in positive_] , c=c)
    plt.show()

if __name__ == '__main__':
    main()

#----------result----------------
/usr/bin/python3 /Users/zhengyanzhao/PycharmProjects/tongjixuexi/shixian2/K_means.py
[3, 3, 3, 4, 2, 0, 0, 1, 3, 0, 2, 2, 3, 3, 3, 1, 1]
[[1, 7.211102550927978], [2, 4.319596107466319], [3, 3.7863912010391503], [4, 1.9715873793431686], [5, 1.612899020449196], [6, 1.0107491837076632], [7, 1.0337239967856842]]

标签:means,self,list,算法,result,way,聚类,append,dis
来源: https://blog.csdn.net/weixin_45839693/article/details/110888036

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有