[go: nahoru, domu]

Skip to content

Commit

Permalink
更新绘图
Browse files Browse the repository at this point in the history
  • Loading branch information
luokn committed May 21, 2022
1 parent fccc5d7 commit a4135c2
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 45 deletions.
26 changes: 13 additions & 13 deletions src/lda.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,40 +60,40 @@ def load_data():
theta = np.pi / 4
scale = np.array([[2, 0], [0, 0.8]]) # 缩放
rotate = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) # 旋转
x = np.stack([np.random.randn(500, 2) + [0, 2], np.random.randn(500, 2) - [0, 2]]) @ scale @ rotate
X = np.stack([np.random.randn(500, 2) + [0, 2], np.random.randn(500, 2) - [0, 2]]) @ scale @ rotate
y = np.stack([np.full([500], 0), np.full([500], 1)])
return x, y
return X, y


if __name__ == "__main__":
x, y = load_data()
X, y = load_data()
plt.figure(figsize=[18, 6])
plt.subplot(1, 3, 1)
plt.title("Ground Truth")
plt.xlim(-5, 5)
plt.ylim(-5, 5)
plt.scatter(x[0, :, 0], x[0, :, 1], color="r", marker=".")
plt.scatter(x[1, :, 0], x[1, :, 1], color="g", marker=".")
plt.scatter(X[0, :, 0], X[0, :, 1], marker=".")
plt.scatter(X[1, :, 0], X[1, :, 1], marker=".")

x, y = x.reshape(-1, 2), y.flatten()
X, y = X.reshape(-1, 2), y.reshape(-1)

lda = LDA(1)
lda.fit(x, y)
z = lda(x)
lda.fit(X, y)
Z = lda(X)
plt.subplot(1, 3, 2)
plt.title("LDA")
plt.xlim(-5, 5)
plt.ylim(-5, 5)
plt.scatter(z[:500, 0], np.zeros([500]), color="r", marker=".")
plt.scatter(z[500:, 0], np.zeros([500]), color="g", marker=".")
plt.scatter(Z[:500, 0], np.zeros([500]), marker=".")
plt.scatter(Z[500:, 0], np.zeros([500]), marker=".")

# 和PCA对比
pca = PCA(1)
z = pca(x)
Z = pca(X)
plt.subplot(1, 3, 3)
plt.title("PCA")
plt.xlim(-5, 5)
plt.ylim(-5, 5)
plt.scatter(z[:500, 0], np.zeros([500]), color="r", marker=".")
plt.scatter(z[500:, 0], np.zeros([500]), color="g", marker=".")
plt.scatter(Z[:500, 0], np.zeros([500]), marker=".")
plt.scatter(Z[500:, 0], np.zeros([500]), marker=".")
plt.show()
75 changes: 43 additions & 32 deletions src/logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,24 @@ class LogisticRegression:
Logistic regression classifier(逻辑斯蒂回归分类器)
"""

def __init__(self, input_dim: int):
def __init__(self, input_dim: int, lr=5e-4):
"""
Args:
input_dim (int):输入维度
input_dim (int): 特征维度
lr (float): 学习率, 默认为5e-4
"""
self.weights = np.random.randn(input_dim + 1) # 随机初始化参数
self.lr = lr # 学习率

def fit(self, X: np.ndarray, Y: np.ndarray, lr=1e-3):
def fit(self, X: np.ndarray, y: np.ndarray):
X_pad = pad(X) # 为X填充1作为偏置
pred = sigmoid(X_pad @ self.weights) # 计算预测值
grad = X_pad.T @ (pred - Y) / len(pred) # 计算梯度
self.weights -= lr * grad # 沿负梯度更新参数
grad = X_pad.T @ (pred - y) / len(pred) # 计算梯度
self.weights -= self.lr * grad # 沿负梯度更新参数

def __call__(self, X: np.ndarray):
pred = sigmoid(pad(X) @ self.weights) # 计算预测值
return np.where(pred > 0.5, 1, 0) # 将(0, 1)之间分布的概率转化为{0, 1}标签
def __call__(self, X: np.ndarray) -> np.ndarray:
y_pred = sigmoid(pad(X) @ self.weights) # 计算预测值
return np.where(y_pred > 0.5, 1, 0) # 将(0, 1)之间分布的概率转化为{0, 1}标签


def pad(x):
Expand All @@ -38,48 +40,57 @@ def sigmoid(x):
return 1 / (1 + np.exp(-x))


def load_data():
x = np.stack([np.random.randn(500, 2) + np.array([1, -1]), np.random.randn(500, 2) + np.array([-1, 1])])
y = np.stack([np.full([500], 0), np.full([500], 1)])
return x, y
def load_data(n_samples_per_class=500):
X = np.concatenate(
[
np.random.randn(n_samples_per_class, 2) + np.array([1, -1]),
np.random.randn(n_samples_per_class, 2) + np.array([-1, 1]),
]
)
y = np.concatenate([np.full([n_samples_per_class], 0), np.full([n_samples_per_class], 1)])

training_set, test_set = np.split(np.random.permutation(len(X)), [int(len(X) * 0.6)])

def train_logistic_regression(model, x, y, epochs, batch_size=32):
indices = np.arange(len(x))
return X, y, training_set, test_set


def train_logistic_regression(model, X, y, epochs, batch_size=32):
indices = np.arange(len(X))
for _ in range(epochs):
np.random.shuffle(indices)
for i in range(batch_size, len(x) + 1, batch_size):
model.fit(x[indices[(i - batch_size) : i]], y[indices[(i - batch_size) : i]])
for i in range(batch_size, len(X) + 1, batch_size):
model.fit(X[indices[(i - batch_size) : i]], y[indices[(i - batch_size) : i]])


if __name__ == "__main__":
x, y = load_data()
X, y, training_set, test_set = load_data()

X_0, X_1 = X[y == 0], X[y == 1]
plt.figure(figsize=[12, 6])
plt.subplot(1, 2, 1)
plt.title("Ground Truth")
plt.xlim(-5, 5)
plt.ylim(-5, 5)
plt.scatter(x[0, :, 0], x[0, :, 1], color="r", marker=".")
plt.scatter(x[1, :, 0], x[1, :, 1], color="g", marker=".")
plt.xlim(-4, 4)
plt.ylim(-4, 4)
plt.scatter(X_0[:, 0], X_0[:, 1], marker=".")
plt.scatter(X_1[:, 0], X_1[:, 1], marker=".")

x, y = x.reshape(-1, 2), y.flatten()
logistic_regression = LogisticRegression(2)
train_logistic_regression(logistic_regression, x, y, epochs=500)
pred = logistic_regression(x)
acc = np.sum(pred == y) / len(pred)
train_logistic_regression(logistic_regression, X, y, epochs=500)
y_pred = logistic_regression(X)
acc = np.sum(y_pred[test_set] == y[test_set]) / len(test_set)
print(f"Accuracy = {100 * acc:.2f}%")

x0, x1 = x[pred == 0], x[pred == 1]
X_0, X_1 = X[y_pred == 0], X[y_pred == 1]
plt.subplot(1, 2, 2)
plt.title("Prediction")
plt.xlim(-5, 5)
plt.ylim(-5, 5)
plt.scatter(x0[:, 0], x0[:, 1], color="r", marker=".")
plt.scatter(x1[:, 0], x1[:, 1], color="g", marker=".")
plt.xlim(-4, 4)
plt.ylim(-4, 4)
plt.scatter(X_0[:, 0], X_0[:, 1], marker=".")
plt.scatter(X_1[:, 0], X_1[:, 1], marker=".")

w = logistic_regression.weights
a, b = -w[0] / w[1], -w[2] / w[1]
line_x = np.linspace(-5, 5, 100)
line_x = np.linspace(-4, 4, 400)
line_y = a * line_x + b
plt.plot(line_x, line_y, color="b", linewidth=1)
plt.plot(line_x, line_y, c="b", lw=1)
plt.show()

0 comments on commit a4135c2

Please sign in to comment.