初级
第20章 潜在狄利克雷分配 - 实现LDA模型
未完成
初级参考
完整示例代码供参考,建议自己理解后重新输入
import numpy as np
class LDA:
def __init__(self, n_topics=10, alpha=0.1, beta=0.1, n_iter=1000):
self.n_topics = n_topics
self.alpha = alpha
self.beta = beta
self.n_iter = n_iter
def fit(self, documents, vocab_size):
n_docs = len(documents)
self.vocab_size = vocab_size
self.topic_word = np.zeros((self.n_topics, vocab_size)) + self.beta
self.doc_topic = np.zeros((n_docs, self.n_topics)) + self.alpha
self.topic_counts = np.zeros(self.n_topics) + vocab_size * self.beta
topic_assignments = []
for d, doc in enumerate(documents):
topics = np.random.randint(0, self.n_topics, len(doc))
topic_assignments.append(topics)
for w, t in zip(doc, topics):
self.topic_word[t, w] += 1
self.doc_topic[d, t] += 1
self.topic_counts[t] += 1
for _ in range(self.n_iter):
for d, doc in enumerate(documents):
for i, w in enumerate(doc):
t = topic_assignments[d][i]
self.topic_word[t, w] -= 1
self.doc_topic[d, t] -= 1
self.topic_counts[t] -= 1
probs = (self.topic_word[:, w] / self.topic_counts) * (self.doc_topic[d] / self.doc_topic[d].sum())
t = np.random.choice(self.n_topics, p=probs / probs.sum())
topic_assignments[d][i] = t
self.topic_word[t, w] += 1
self.doc_topic[d, t] += 1
self.topic_counts[t] += 1
return self
def get_topic_words(self, n_words=10):
topic_words = []
for t in range(self.n_topics):
top_words = np.argsort(self.topic_word[t] - self.beta)[-n_words:][::-1]
topic_words.append(top_words)
return topic_words
👑
升级 VIP
解锁全部题目,畅通无阻地学习
- ✓ 解锁全部训练包所有题目
- ✓ 查看完整参考代码和提示
- ✓ 浏览器内直接运行 Python 代码
- ✓ 自动批改 + 进度追踪
30天
¥18
1年
¥99
2年
¥158
3年
¥199