初级
第22章 无监督学习方法总结 - 实现轮廓系数
未完成
初级参考
完整示例代码供参考,建议自己理解后重新输入
import numpy as np
def silhouette_score(X, labels):
n_samples = X.shape[0]
unique_labels = np.unique(labels)
n_clusters = len(unique_labels)
if n_clusters == 1 or n_clusters == n_samples:
return 0.0
silhouette_values = np.zeros(n_samples)
for i in range(n_samples):
same_cluster = labels == labels[i]
other_clusters = labels != labels[i]
if np.sum(same_cluster) > 1:
a_i = np.mean(np.linalg.norm(X[same_cluster] - X[i], axis=1))
else:
a_i = 0
b_i = np.inf
for label in unique_labels:
if label != labels[i]:
other_cluster = labels == label
dist = np.mean(np.linalg.norm(X[other_cluster] - X[i], axis=1))
b_i = min(b_i, dist)
if b_i == np.inf:
silhouette_values[i] = 0
else:
silhouette_values[i] = (b_i - a_i) / max(a_i, b_i)
return np.mean(silhouette_values)
def silhouette_plot(X, labels):
import matplotlib.pyplot as plt
scores = []
unique_labels = np.unique(labels)
for label in sorted(unique_labels):
cluster_mask = labels == label
cluster_scores = []
for i in np.where(cluster_mask)[0]:
same_cluster = labels == labels[i]
a_i = np.mean(np.linalg.norm(X[same_cluster] - X[i], axis=1))
b_i = np.inf
for other_label in unique_labels:
if other_label != labels[i]:
other_cluster = labels == other_label
dist = np.mean(np.linalg.norm(X[other_cluster] - X[i], axis=1))
b_i = min(b_i, dist)
if b_i != np.inf and max(a_i, b_i) > 0:
cluster_scores.append((b_i - a_i) / max(a_i, b_i))
scores.extend(cluster_scores)
plt.figure(figsize=(8, 6))
plt.bar(range(len(scores)), sorted(scores))
plt.xlabel('Sample Index')
plt.ylabel('Silhouette Coefficient')
plt.title('Silhouette Plot')
plt.axhline(y=np.mean(scores), color='r', linestyle='--', label=f'Mean: {np.mean(scores):.3f}')
plt.legend()
plt.show()
return np.mean(scores)
👑
升级 VIP
解锁全部题目,畅通无阻地学习
- ✓ 解锁全部训练包所有题目
- ✓ 查看完整参考代码和提示
- ✓ 浏览器内直接运行 Python 代码
- ✓ 自动批改 + 进度追踪
30天
¥18
1年
¥99
2年
¥158
3年
¥199