← 返回题库
初级

第22章 无监督学习方法总结 - 实现DBSCAN聚类

未完成
初级参考 完整示例代码供参考,建议自己理解后重新输入
import numpy as np

class DBSCAN:
    def __init__(self, eps=0.5, min_samples=5):
        self.eps = eps
        self.min_samples = min_samples
        self.labels = None
    
    def fit(self, X):
        n_samples = X.shape[0]
        self.labels = np.full(n_samples, -1)
        
        cluster_id = 0
        
        for i in range(n_samples):
            if self.labels[i] != -1:
                continue
            
            neighbors = self._get_neighbors(X, i)
            
            if len(neighbors) < self.min_samples:
                continue
            
            self.labels[i] = cluster_id
            seed_set = list(neighbors)
            seed_set.remove(i)
            
            j = 0
            while j < len(seed_set):
                q = seed_set[j]
                
                if self.labels[q] == -1:
                    self.labels[q] = cluster_id
                
                if self.labels[q] != -1:
                    j += 1
                    continue
                
                self.labels[q] = cluster_id
                q_neighbors = self._get_neighbors(X, q)
                
                if len(q_neighbors) >= self.min_samples:
                    seed_set.extend([n for n in q_neighbors if n not in seed_set])
                
                j += 1
            
            cluster_id += 1
        
        return self
    
    def _get_neighbors(self, X, i):
        distances = np.linalg.norm(X - X[i], axis=1)
        return np.where(distances <= self.eps)[0]
    
    def fit_predict(self, X):
        self.fit(X)
        return self.labels
Python 代码 🔒 登录后使用
🔒

登录后即可练习

注册免费账号,在浏览器中直接运行 Python 代码