← 返回题库
初级

第20章 潜在狄利克雷分配 - 实现LDA模型

未完成
初级参考 完整示例代码供参考,建议自己理解后重新输入
import numpy as np

class LDA:
    def __init__(self, n_topics=10, alpha=0.1, beta=0.1, n_iter=1000):
        self.n_topics = n_topics
        self.alpha = alpha
        self.beta = beta
        self.n_iter = n_iter
    
    def fit(self, documents, vocab_size):
        n_docs = len(documents)
        self.vocab_size = vocab_size
        
        self.topic_word = np.zeros((self.n_topics, vocab_size)) + self.beta
        self.doc_topic = np.zeros((n_docs, self.n_topics)) + self.alpha
        self.topic_counts = np.zeros(self.n_topics) + vocab_size * self.beta
        
        topic_assignments = []
        for d, doc in enumerate(documents):
            topics = np.random.randint(0, self.n_topics, len(doc))
            topic_assignments.append(topics)
            for w, t in zip(doc, topics):
                self.topic_word[t, w] += 1
                self.doc_topic[d, t] += 1
                self.topic_counts[t] += 1
        
        for _ in range(self.n_iter):
            for d, doc in enumerate(documents):
                for i, w in enumerate(doc):
                    t = topic_assignments[d][i]
                    self.topic_word[t, w] -= 1
                    self.doc_topic[d, t] -= 1
                    self.topic_counts[t] -= 1
                    
                    probs = (self.topic_word[:, w] / self.topic_counts) * (self.doc_topic[d] / self.doc_topic[d].sum())
                    t = np.random.choice(self.n_topics, p=probs / probs.sum())
                    
                    topic_assignments[d][i] = t
                    self.topic_word[t, w] += 1
                    self.doc_topic[d, t] += 1
                    self.topic_counts[t] += 1
        
        return self
    
    def get_topic_words(self, n_words=10):
        topic_words = []
        for t in range(self.n_topics):
            top_words = np.argsort(self.topic_word[t] - self.beta)[-n_words:][::-1]
            topic_words.append(top_words)
        return topic_words
Python 代码 🔒 登录后使用
🔒

登录后即可练习

注册免费账号,在浏览器中直接运行 Python 代码