← 返回题库
初级

第20章 潜在狄利克雷分配 - 吉布斯采样实现

未完成
初级参考 完整示例代码供参考,建议自己理解后重新输入
import numpy as np

def gibbs_sampling_lda(documents, n_topics, vocab_size, alpha=0.1, beta=0.1, n_iter=100):
    n_docs = len(documents)
    
    topic_word = np.zeros((n_topics, vocab_size)) + beta
    doc_topic = np.zeros((n_docs, n_topics)) + alpha
    topic_counts = np.zeros(n_topics) + vocab_size * beta
    
    topic_assignments = []
    for d, doc in enumerate(documents):
        topics = np.random.randint(0, n_topics, len(doc))
        topic_assignments.append(topics)
        for w, t in zip(doc, topics):
            topic_word[t, w] += 1
            doc_topic[d, t] += 1
            topic_counts[t] += 1
    
    samples = []
    for iteration in range(n_iter):
        for d, doc in enumerate(documents):
            for i, w in enumerate(doc):
                t = topic_assignments[d][i]
                topic_word[t, w] -= 1
                doc_topic[d, t] -= 1
                topic_counts[t] -= 1
                
                p_z = (topic_word[:, w] / topic_counts) * (doc_topic[d] / doc_topic[d].sum())
                p_z = p_z / p_z.sum()
                
                t = np.random.choice(n_topics, p=p_z)
                topic_assignments[d][i] = t
                topic_word[t, w] += 1
                doc_topic[d, t] += 1
                topic_counts[t] += 1
        
        if iteration % 10 == 0:
            samples.append({
                'topic_word': topic_word.copy() - beta,
                'doc_topic': doc_topic.copy() - alpha
            })
    
    return samples
Python 代码 🔒 登录后使用
🔒

登录后即可练习

注册免费账号,在浏览器中直接运行 Python 代码