初级
第20章 潜在狄利克雷分配 - 吉布斯采样实现
未完成
初级参考
完整示例代码供参考,建议自己理解后重新输入
import numpy as np
def gibbs_sampling_lda(documents, n_topics, vocab_size, alpha=0.1, beta=0.1, n_iter=100):
n_docs = len(documents)
topic_word = np.zeros((n_topics, vocab_size)) + beta
doc_topic = np.zeros((n_docs, n_topics)) + alpha
topic_counts = np.zeros(n_topics) + vocab_size * beta
topic_assignments = []
for d, doc in enumerate(documents):
topics = np.random.randint(0, n_topics, len(doc))
topic_assignments.append(topics)
for w, t in zip(doc, topics):
topic_word[t, w] += 1
doc_topic[d, t] += 1
topic_counts[t] += 1
samples = []
for iteration in range(n_iter):
for d, doc in enumerate(documents):
for i, w in enumerate(doc):
t = topic_assignments[d][i]
topic_word[t, w] -= 1
doc_topic[d, t] -= 1
topic_counts[t] -= 1
p_z = (topic_word[:, w] / topic_counts) * (doc_topic[d] / doc_topic[d].sum())
p_z = p_z / p_z.sum()
t = np.random.choice(n_topics, p=p_z)
topic_assignments[d][i] = t
topic_word[t, w] += 1
doc_topic[d, t] += 1
topic_counts[t] += 1
if iteration % 10 == 0:
samples.append({
'topic_word': topic_word.copy() - beta,
'doc_topic': doc_topic.copy() - alpha
})
return samples
👑
升级 VIP
解锁全部题目,畅通无阻地学习
- ✓ 解锁全部训练包所有题目
- ✓ 查看完整参考代码和提示
- ✓ 浏览器内直接运行 Python 代码
- ✓ 自动批改 + 进度追踪
30天
¥18
1年
¥99
2年
¥158
3年
¥199