← 返回题库
初级

第18章 概率潜在语义分析 - 习题18.3 - 代码实现

未完成
初级参考 完整示例代码供参考,建议自己理解后重新输入
import numpy as np

def plsa_em(X, n_topics=2, n_iter=100):
    n_docs, n_words = X.shape
    theta = np.random.rand(n_docs, n_topics)
    theta = theta / theta.sum(axis=1, keepdims=True)
    phi = np.random.rand(n_topics, n_words)
    phi = phi / phi.sum(axis=1, keepdims=True)
    
    for _ in range(n_iter):
        P_z_dw = np.zeros((n_docs, n_words, n_topics))
        for d in range(n_docs):
            for w in range(n_words):
                for z in range(n_topics):
                    P_z_dw[d, w, z] = theta[d, z] * phi[z, w]
                total = P_z_dw[d, w].sum()
                if total > 0:
                    P_z_dw[d, w] = P_z_dw[d, w] / total
        
        theta_new = np.zeros((n_docs, n_topics))
        phi_new = np.zeros((n_topics, n_words))
        
        for d in range(n_docs):
            for z in range(n_topics):
                theta_new[d, z] = P_z_dw[d, :, z].sum()
        
        for z in range(n_topics):
            for w in range(n_words):
                phi_new[z, w] = P_z_dw[:, w, z].sum()
        
        theta = theta_new / theta_new.sum(axis=1, keepdims=True)
        phi = phi_new / phi_new.sum(axis=1, keepdims=True)
    
    return theta, phi

X = np.array([[1, 2, 0], [0, 1, 2], [2, 0, 1]])
theta, phi = plsa_em(X)
print("文档-主题分布:", theta)
Python 代码 🔒 登录后使用
🔒

登录后即可练习

注册免费账号,在浏览器中直接运行 Python 代码