← 返回题库
初级

第22章 无监督学习方法总结 - 实现轮廓系数

未完成
初级参考 完整示例代码供参考,建议自己理解后重新输入
import numpy as np

def silhouette_score(X, labels):
    n_samples = X.shape[0]
    unique_labels = np.unique(labels)
    n_clusters = len(unique_labels)
    
    if n_clusters == 1 or n_clusters == n_samples:
        return 0.0
    
    silhouette_values = np.zeros(n_samples)
    
    for i in range(n_samples):
        same_cluster = labels == labels[i]
        other_clusters = labels != labels[i]
        
        if np.sum(same_cluster) > 1:
            a_i = np.mean(np.linalg.norm(X[same_cluster] - X[i], axis=1))
        else:
            a_i = 0
        
        b_i = np.inf
        for label in unique_labels:
            if label != labels[i]:
                other_cluster = labels == label
                dist = np.mean(np.linalg.norm(X[other_cluster] - X[i], axis=1))
                b_i = min(b_i, dist)
        
        if b_i == np.inf:
            silhouette_values[i] = 0
        else:
            silhouette_values[i] = (b_i - a_i) / max(a_i, b_i)
    
    return np.mean(silhouette_values)

def silhouette_plot(X, labels):
    import matplotlib.pyplot as plt
    
    scores = []
    unique_labels = np.unique(labels)
    
    for label in sorted(unique_labels):
        cluster_mask = labels == label
        cluster_scores = []
        for i in np.where(cluster_mask)[0]:
            same_cluster = labels == labels[i]
            a_i = np.mean(np.linalg.norm(X[same_cluster] - X[i], axis=1))
            
            b_i = np.inf
            for other_label in unique_labels:
                if other_label != labels[i]:
                    other_cluster = labels == other_label
                    dist = np.mean(np.linalg.norm(X[other_cluster] - X[i], axis=1))
                    b_i = min(b_i, dist)
            
            if b_i != np.inf and max(a_i, b_i) > 0:
                cluster_scores.append((b_i - a_i) / max(a_i, b_i))
        
        scores.extend(cluster_scores)
    
    plt.figure(figsize=(8, 6))
    plt.bar(range(len(scores)), sorted(scores))
    plt.xlabel('Sample Index')
    plt.ylabel('Silhouette Coefficient')
    plt.title('Silhouette Plot')
    plt.axhline(y=np.mean(scores), color='r', linestyle='--', label=f'Mean: {np.mean(scores):.3f}')
    plt.legend()
    plt.show()
    
    return np.mean(scores)
Python 代码 🔒 登录后使用
🔒

登录后即可练习

注册免费账号,在浏览器中直接运行 Python 代码