← 返回题库
初级

第13章 无监督学习概论 - 实现层次聚类

未完成
初级参考 完整示例代码供参考,建议自己理解后重新输入
import numpy as np
from scipy.cluster.hierarchy import dendrogram, linkage
import matplotlib.pyplot as plt

class AgglomerativeClustering:
    def __init__(self, n_clusters=2):
        self.n_clusters = n_clusters
        self.labels = None
    
    def fit(self, X):
        n_samples = X.shape[0]
        clusters = [[i] for i in range(n_samples)]
        
        distances = np.zeros((n_samples, n_samples))
        for i in range(n_samples):
            for j in range(i+1, n_samples):
                distances[i, j] = distances[j, i] = np.linalg.norm(X[i] - X[j])
        
        np.fill_diagonal(distances, np.inf)
        
        while len(clusters) > self.n_clusters:
            min_dist = np.inf
            merge_i, merge_j = 0, 0
            
            for i in range(len(clusters)):
                for j in range(i+1, len(clusters)):
                    cluster_dist = self._cluster_distance(X, clusters[i], clusters[j], distances)
                    if cluster_dist < min_dist:
                        min_dist = cluster_dist
                        merge_i, merge_j = i, j
            
            clusters[merge_i].extend(clusters[merge_j])
            clusters.pop(merge_j)
        
        self.labels = np.zeros(n_samples, dtype=int)
        for label, cluster in enumerate(clusters):
            for idx in cluster:
                self.labels[idx] = label
        
        return self
    
    def _cluster_distance(self, X, cluster1, cluster2, distances):
        max_dist = 0
        for i in cluster1:
            for j in cluster2:
                max_dist = max(max_dist, distances[i, j])
        return max_dist
Python 代码 🔒 登录后使用
🔒

登录后即可练习

注册免费账号,在浏览器中直接运行 Python 代码