初级
第05章 决策树 - 习题5.2 - 实现LeastSqRTree类
未完成
初级参考
完整示例代码供参考,建议自己理解后重新输入
import numpy as np
class LeastSqRTree:
def __init__(self, train_X, y, epsilon):
# 训练集特征值
self.x = train_X
# 类别
self.y = y
# 特征总数
self.feature_count = train_X.shape[1]
# 损失阈值
self.epsilon = epsilon
# 回归树
self.tree = None
def _fit(self, x, y, feature_count, epsilon):
# 选择最优切分点变量j与切分点s
(j, s, minval, c1, c2) = self._divide(x, y, feature_count)
# 初始化树
tree = {"feature": j, "value": x[s, j], "left": None, "right": None}
if minval < self.epsilon or len(y[np.where(x[:, j] <= x[s, j])]) <= 1:
tree["left"] = c1
else:
tree["left"] = self._fit(x[np.where(x[:, j] <= x[s, j])],
y[np.where(x[:, j] <= x[s, j])],
self.feature_count, self.epsilon)
if minval < self.epsilon or len(y[np.where(x[:, j] > s)]) <= 1:
tree["right"] = c2
else:
tree["right"] = self._fit(x[np.where(x[:, j] > x[s, j])],
y[np.where(x[:, j] > x[s, j])],
self.feature_count, self.epsilon)
return tree
def fit(self):
self.tree = self._fit(self.x, self.y, self.feature_count, self.epsilon)
@staticmethod
def _divide(x, y, feature_count):
# 初始化损失误差
cost = np.zeros((feature_count, len(x)))
# 公式5.21
for i in range(feature_count):
for k in range(len(x)):
# k行i列的特征值
value = x[k, i]
y1 = y[np.where(x[:, i] <= value)]
c1 = np.mean(y1)
y2 = y[np.where(x[:, i] > value)]
c2 = np.mean(y2)
y1[:] = y1[:] - c1
y2[:] = y2[:] - c2
cost[i, k] = np.sum(y1 * y1) + np.sum(y2 * y2)
# 选取最优损失误差点
cost_index = np.where(cost == np.min(cost))
# 选取第几个特征值
j = cost_index[0][0]
# 选取特征值的切分点
s = cost_index[1][0]
# 求两个区域的均值c1,c2
c1 = np.mean(y[np.where(x[:, j] <= x[s, j])])
c2 = np.mean(y[np.where(x[:, j] > x[s, j])])
return j, s, cost[cost_index], c1, c2
👑
升级 VIP
解锁全部题目,畅通无阻地学习
- ✓ 解锁全部训练包所有题目
- ✓ 查看完整参考代码和提示
- ✓ 浏览器内直接运行 Python 代码
- ✓ 自动批改 + 进度追踪
30天
¥18
1年
¥99
2年
¥158
3年
¥199