，

luokn · Oct 7, 2022 · d32c46d · d32c46d
1 parent cfa2147
commit d32c46d
Show file tree

Hide file tree

Showing 12 changed files with 59 additions and 79 deletions.
diff --git a/src/adaboost.py b/src/adaboost.py
@@ -3,12 +3,12 @@
 # @Author: Luokun
 # @Email : olooook@outlook.com
 
-
 import numpy as np
 from matplotlib import pyplot as plt
 
 
 class AdaBoost:
+
     def __init__(self, n_estimators: int, lr=1e-2, eps=1e-5):
         """
         Args:
@@ -52,6 +52,7 @@ def __call__(self, X: np.ndarray) -> np.ndarray:
 
 
 class WeakEstimator:  # 弱分类器, 一阶决策树
+
     def __init__(self, lr: float):
         self.lr, self.feature, self.threshold, self.sign = lr, None, None, None  # 划分特征、划分阈值，符号{-1，1}
 
@@ -73,12 +74,10 @@ def __call__(self, X: np.ndarray) -> np.ndarray:
 
 
 def load_data(n_samples_per_class=500):
-    X = np.concatenate(
-        [
-            np.random.randn(n_samples_per_class, 2) + np.array([1, -1]),
-            np.random.randn(n_samples_per_class, 2) + np.array([-1, 1]),
-        ]
-    )
+    X = np.concatenate([
+        np.random.randn(n_samples_per_class, 2) + np.array([1, -1]),
+        np.random.randn(n_samples_per_class, 2) + np.array([-1, 1]),
+    ])
     y = np.array([1] * n_samples_per_class + [-1] * n_samples_per_class)
 
     training_set, test_set = np.split(np.random.permutation(len(X)), [int(len(X) * 0.8)])

diff --git a/src/decision_tree.py b/src/decision_tree.py
@@ -3,7 +3,6 @@
 # @Author: Luokun
 # @Email : olooook@outlook.com
 
-
 import numpy as np
 
 
@@ -70,26 +69,24 @@ def calc_info_gain(cls, X: np.ndarray, y: np.ndarray, rows: np.ndarray, col: int
 
 
 def load_data():
-    X = np.array(
-        [
-            [0, 0, 0],
-            [0, 0, 0],
-            [0, 0, 1],
-            [0, 0, 1],
-            [0, 1, 0],
-            [0, 1, 0],
-            [0, 1, 1],
-            [0, 1, 1],
-            [1, 0, 0],
-            [1, 0, 0],
-            [1, 0, 1],
-            [1, 0, 1],
-            [1, 1, 0],
-            [1, 1, 0],
-            [1, 1, 1],
-            [1, 1, 1],
-        ]
-    )
+    X = np.array([
+        [0, 0, 0],
+        [0, 0, 0],
+        [0, 0, 1],
+        [0, 0, 1],
+        [0, 1, 0],
+        [0, 1, 0],
+        [0, 1, 1],
+        [0, 1, 1],
+        [1, 0, 0],
+        [1, 0, 0],
+        [1, 0, 1],
+        [1, 0, 1],
+        [1, 1, 0],
+        [1, 1, 0],
+        [1, 1, 1],
+        [1, 1, 1],
+    ])
     y = np.where(X.sum(axis=1) >= 2, 1, 0)
     return X, y
 

diff --git a/src/em.py b/src/em.py
@@ -3,7 +3,6 @@
 # @Author: Luokun
 # @Email : olooook@outlook.com
 
-
 import numpy as np
 
 
@@ -22,8 +21,8 @@ def fit(self, X: np.ndarray, iterations=100):
 
     def _expect(self, X: np.ndarray):  # E步
         p1, p2, p3 = self.prob
-        a = p1 * (p2 ** X) * ((1 - p2) ** (1 - X))
-        b = (1 - p1) * (p3 ** X) * ((1 - p3) ** (1 - X))
+        a = p1 * (p2**X) * ((1 - p2)**(1 - X))
+        b = (1 - p1) * (p3**X) * ((1 - p3)**(1 - X))
         return a / (a + b)
 
     def _maximize(self, X: np.ndarray, M: np.ndarray):  # M步
@@ -34,7 +33,6 @@ def _maximize(self, X: np.ndarray, M: np.ndarray):  # M步
 
 # EM算法与高斯混合模型可参见./gmm.py
 
-
 if __name__ == "__main__":
     x = np.array([1, 1, 0, 1, 0, 0, 1, 0, 1, 1])
 

diff --git a/src/gmm.py b/src/gmm.py
@@ -65,12 +65,10 @@ def maximize(self, X: np.ndarray, G: np.ndarray):  # M步
 
 
 def load_data(n_samples_per_class=500):
-    X = np.concatenate(
-        [
-            np.random.multivariate_normal(mean=[4, 0], cov=[[2, 0], [0, 2]], size=[n_samples_per_class]),
-            np.random.multivariate_normal(mean=[0, 4], cov=[[2, 0], [0, 2]], size=[n_samples_per_class]),
-        ]
-    )
+    X = np.concatenate([
+        np.random.multivariate_normal(mean=[4, 0], cov=[[2, 0], [0, 2]], size=[n_samples_per_class]),
+        np.random.multivariate_normal(mean=[0, 4], cov=[[2, 0], [0, 2]], size=[n_samples_per_class]),
+    ])
     y = np.array([0] * n_samples_per_class + [1] * n_samples_per_class)
     return X, y
 

diff --git a/src/kmeans.py b/src/kmeans.py
@@ -35,13 +35,11 @@ def fit(self, X: np.ndarray):
             y_pred = self(X)
 
             # 各类别的均值作为新的中心点,
-            centers = np.stack(
-                [
-                    # 存在元素属于类别i则计算类别i所有点的均值，否则随机选择一个点作为类别i的均值
-                    np.mean(X[y_pred == i], axis=0) if np.any(y_pred == i) else random.choice(X)
-                    for i in range(self.n_clusters)
-                ]
-            )
+            centers = np.stack([
+                # 存在元素属于类别i则计算类别i所有点的均值，否则随机选择一个点作为类别i的均值
+                np.mean(X[y_pred == i], axis=0) if np.any(y_pred == i) else random.choice(X)
+                for i in range(self.n_clusters)
+            ])
 
             # 中心点最大更新值小于eps则停止迭代
             if np.abs(self.centers - centers).max() < self.eps:

diff --git a/src/knn.py b/src/knn.py
@@ -27,7 +27,7 @@ def __call__(self, X: np.ndarray):
         y_pred = np.zeros([len(X)], dtype=int)  # X对应的类别
         for i, x in enumerate(X):
             dist = LA.norm(self.X - x, axis=1)  # 计算x与所有已知类别点的距离
-            topk = np.argsort(dist)[: self.k]  # 取距离最小的k个点对应的索引
+            topk = np.argsort(dist)[:self.k]  # 取距离最小的k个点对应的索引
             y_pred[i] = np.bincount(self.y[topk]).argmax()  # 取近邻点最多的类别作为x的类别
         return y_pred
 

diff --git a/src/lda.py b/src/lda.py
@@ -30,7 +30,7 @@ def fit(self, X: np.ndarray, Y: np.ndarray):
             S_W += (Xi - Mi).T @ (Xi - Mi)
             S_B += len(Xi) * (Mi - M).reshape(-1, 1) @ (Mi - M).reshape(1, -1)
         L, V = LA.eig(LA.inv(S_W) @ S_B)  # 计算 S_W^{-1} S_B 的特征值与特征向量
-        topk = np.argsort(L)[::-1][: self.k]  # 按照特征值降序排列，取前K大特征值
+        topk = np.argsort(L)[::-1][:self.k]  # 按照特征值降序排列，取前K大特征值
         self.W = V[:, topk]  # 选择topk对应的特征向量
 
     def __call__(self, X: np.ndarray):
@@ -52,20 +52,18 @@ def __init__(self, k: int):
     def __call__(self, X: np.ndarray):
         X_norm = X - X.mean(axis=0)  # 去中心化
         L, V = np.linalg.eig(X_norm.T @ X_norm)  # 对协方差矩阵进行特征值分解
-        topk = np.argsort(L)[::-1][: self.k]  # 找出前K大特征值对应的索引
+        topk = np.argsort(L)[::-1][:self.k]  # 找出前K大特征值对应的索引
         return X_norm @ V[:, topk]  # 将去中心化的X乘以前K大特征值对应的特征向量
 
 
 def load_data(n_samlpes_per_class=500):
     theta = np.pi / 4
     scale = np.array([[2, 0], [0, 0.5]])  # 缩放
     rotate = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])  # 旋转
-    X = np.concatenate(
-        [
-            np.random.randn(n_samlpes_per_class, 2) + np.array([0, -2]),
-            np.random.randn(n_samlpes_per_class, 2) + np.array([0, +2]),
-        ]
-    )
+    X = np.concatenate([
+        np.random.randn(n_samlpes_per_class, 2) + np.array([0, -2]),
+        np.random.randn(n_samlpes_per_class, 2) + np.array([0, +2]),
+    ])
     X = X @ scale @ rotate  # 对数据进行缩放和旋转
     y = np.array([0] * n_samlpes_per_class + [1] * n_samlpes_per_class)
     return X, y

diff --git a/src/logistic_regression.py b/src/logistic_regression.py
@@ -41,12 +41,10 @@ def sigmoid(x):
 
 
 def load_data(n_samples_per_class=500):
-    X = np.concatenate(
-        [
-            np.random.randn(n_samples_per_class, 2) + np.array([1, -1]),
-            np.random.randn(n_samples_per_class, 2) + np.array([-1, 1]),
-        ]
-    )
+    X = np.concatenate([
+        np.random.randn(n_samples_per_class, 2) + np.array([1, -1]),
+        np.random.randn(n_samples_per_class, 2) + np.array([-1, 1]),
+    ])
     y = np.array([0] * n_samples_per_class + [1] * n_samples_per_class)
 
     training_set, test_set = np.split(np.random.permutation(len(X)), [int(len(X) * 0.6)])
@@ -58,7 +56,7 @@ def train_logistic_regression(model, X, y, epochs=100, batch_size=32):
     for _ in range(epochs):
         np.random.shuffle(indices)
         for i in range(batch_size, len(X) + 1, batch_size):
-            model.fit(X[indices[i - batch_size : i]], y[indices[i - batch_size : i]])
+            model.fit(X[indices[i - batch_size:i]], y[indices[i - batch_size:i]])
 
 
 if __name__ == "__main__":

diff --git a/src/naive_bayes.py b/src/naive_bayes.py
@@ -28,9 +28,8 @@ def fit(self, X: np.ndarray, y: np.ndarray):
     def __call__(self, X: np.ndarray) -> np.ndarray:
         y_pred = np.zeros([len(X)], dtype=int)
         for i, x in enumerate(X):
-            P = np.log(self.P_prior) + np.array(
-                [np.log(p_cond[np.arange(len(x)), x]).sum() for p_cond in self.P_cond]
-            )  # 先验概率的对数,加上条件概率的对数
+            # 先验概率的对数,加上条件概率的对数
+            P = np.log(self.P_prior) + np.array([np.log(p_cond[np.arange(len(x)), x]).sum() for p_cond in self.P_cond])
             y_pred[i] = np.argmax(P)
         return y_pred
 

diff --git a/src/pca.py b/src/pca.py
@@ -22,18 +22,16 @@ def __init__(self, k: int):
     def __call__(self, X: np.ndarray):
         X_norm = X - X.mean(axis=0)  # 去中心化
         L, V = np.linalg.eig(X_norm.T @ X_norm)  # 对协方差矩阵进行特征值分解
-        topk = np.argsort(L)[::-1][: self.k]  # 找出前K大特征值对应的索引
+        topk = np.argsort(L)[::-1][:self.k]  # 找出前K大特征值对应的索引
         return X_norm @ V[:, topk]  # 将去中心化的X乘以前K大特征值对应的特征向量
 
 
 def load_data(n_samples_per_class=200):
-    X = np.concatenate(
-        [
-            np.random.randn(n_samples_per_class, 2) + np.array([2, 0]),
-            np.random.randn(n_samples_per_class, 2),
-            np.random.randn(n_samples_per_class, 2) + np.array([-2, 0]),
-        ]
-    )
+    X = np.concatenate([
+        np.random.randn(n_samples_per_class, 2) + np.array([2, 0]),
+        np.random.randn(n_samples_per_class, 2),
+        np.random.randn(n_samples_per_class, 2) + np.array([-2, 0]),
+    ])
     theta = np.pi / 4  # 逆时针旋转45°
     scale = np.diag([1.2, 0.5])  # 缩放矩阵
     rotate = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])  # 旋转矩阵

diff --git a/src/perceptron.py b/src/perceptron.py
@@ -36,12 +36,10 @@ def pad(x):
 
 
 def load_data(n_samples_per_class=500):
-    X = np.concatenate(
-        [
-            np.random.randn(n_samples_per_class, 2) + np.array([1, -1]),
-            np.random.randn(n_samples_per_class, 2) + np.array([-1, 1]),
-        ]
-    )
+    X = np.concatenate([
+        np.random.randn(n_samples_per_class, 2) + np.array([1, -1]),
+        np.random.randn(n_samples_per_class, 2) + np.array([-1, 1]),
+    ])
     y = np.array([-1] * n_samples_per_class + [1] * n_samples_per_class)
 
     training_set, test_set = np.split(np.random.permutation(len(X)), [int(len(X) * 0.6)])

diff --git a/src/utils.py b/src/utils.py
@@ -5,7 +5,6 @@
 # @Author: Kun Luo
 # @Email : olooook@outlook.com
 
-
 from typing import List, Optional
 
 import numpy as np