logistic回归又称logistic回归分析,是一种广义的线性回归分析模型,常用于数据挖掘,疾病自动诊断,经济预测等领域。类logistic模型的相似性在于,所有这些模型中都存在logistic损失的变体,如等式1所示。
这些模型的差异主要是Loss不同。
Loss
Python代码
逻辑回归(Logistic Regression)
Python
代码语言:txt复制# Name: logistic_regression
# Author: Reacubeth
# Time: 2021/5/9 14:36
# Mail: noverfitting@gmail.com
# Site: www.omegaxyz.com
# *_*coding:utf-8 *_*
import numpy as np
class LogisticRegression:
def __init__(self, name, batch_size, learning_rate, max_iter, optimizer):
super().__init__()
self.W = None
self.name = name
self.batch_size = batch_size
self.learning_rate = learning_rate
self.max_iter = max_iter
self.optimizer = optimizer.lower()
def fit(self, feature, label, verbose=False):
num, dim = feature.shape
self.W = np.random.normal(0, 1, (dim, ))
for t in range(self.max_iter):
if self.optimizer == 'sgd':
rand_pos = np.random.choice(num, self.batch_size)
loss = self.cal_loss(feature[rand_pos, :], label[rand_pos])
grad = self.cal_grad(feature[rand_pos, :], label[rand_pos])
else:
loss = self.cal_loss(feature, label)
grad = self.cal_grad(feature, label)
if verbose:
print(self.name, '@epoch', t, ' loss: ', loss)
self.W = self.W - self.learning_rate * grad
def predict(self, feature, probability=False):
if probability:
return 1 / (1 np.exp(-np.dot(feature, self.W)))
else:
return (1 / (1 np.exp(-np.dot(feature, self.W))) > 0.5).astype(int)
def score(self, feature, label):
pred_label = (1 / (1 np.exp(-np.dot(feature, self.W))) > 0.5).astype(int)
return np.where(pred_label == label)[0].shape[0] / feature.shape[0]
def cal_loss(self, feature, label):
num, dim = feature.shape
return (np.sum(np.log(1 np.exp(np.dot(feature, self.W)))) - np.dot(label, np.dot(feature, self.W))) / num
def cal_grad(self, feature, label):
num, dim = feature.shape
return np.dot(feature.T, 1.0 / (1.0 np.exp(-np.dot(feature, self.W))) - label) / num
岭逻辑回归(Ridge Logistic Regression)
Python
代码语言:txt复制# Name: ridge_logistic_regression
# Author: Reacubeth
# Time: 2021/5/9 19:36
# Mail: noverfitting@gmail.com
# Site: www.omegaxyz.com
# *_*coding:utf-8 *_*
import numpy as np
from numpy import linalg
class RidgeLogisticRegression:
def __init__(self, name, batch_size, learning_rate, max_iter, optimizer, lambda_):
super().__init__()
self.W = None
self.name = name
self.batch_size = batch_size
self.learning_rate = learning_rate
self.max_iter = max_iter
self.optimizer = optimizer.lower()
self.lambda_ = lambda_
def fit(self, feature, label, verbose=False):
num, dim = feature.shape
self.W = np.random.normal(0, 1, (dim, ))
for t in range(self.max_iter):
if self.optimizer == 'sgd':
rand_pos = np.random.choice(num, self.batch_size)
loss = self.cal_loss(feature[rand_pos, :], label[rand_pos])
grad = self.cal_grad(feature[rand_pos, :], label[rand_pos])
else:
loss = self.cal_loss(feature, label)
grad = self.cal_grad(feature, label)
if verbose:
print(self.name, '@epoch', t, ' loss: ', loss)
self.W = self.W - self.learning_rate * grad
def predict(self, feature, probability=False):
if probability:
return 1 / (1 np.exp(-np.dot(feature, self.W)))
else:
return (1 / (1 np.exp(-np.dot(feature, self.W))) > 0.5).astype(int)
def score(self, feature, label):
pred_label = (1 / (1 np.exp(-np.dot(feature, self.W))) > 0.5).astype(int)
return np.where(pred_label == label)[0].shape[0] / feature.shape[0]
def cal_loss(self, feature, label):
num, dim = feature.shape
logistic_loss = np.sum(np.log(1 np.exp(np.dot(feature, self.W)))) - np.dot(label, np.dot(feature, self.W))
# yxw = np.dot(label, np.dot(feature, self.W))
# return (np.sum(np.log(1 np.exp(-yxw))) self.lambda_ / 2 * linalg.norm(self.W) ** 2) / num
return (logistic_loss self.lambda_ / 2 * linalg.norm(self.W) ** 2) / num
def cal_grad(self, feature, label):
num, dim = feature.shape
return (np.dot(feature.T, 1.0 / (1.0 np.exp(-np.dot(feature, self.W))) - label) self.lambda_ * self.W) / num
Lasso逻辑回归(Lasso Logistic Regression)
Python
代码语言:txt复制# Name: lasso_logistic_regression
# Author: Reacubeth
# Time: 2021/5/9 20:17
# Mail: noverfitting@gmail.com
# Site: www.omegaxyz.com
# *_*coding:utf-8 *_*
import numpy as np
from numpy import linalg
class LassoLogisticRegression:
def __init__(self, name, batch_size, learning_rate, max_iter, optimizer, lambda_):
super().__init__()
self.W = None
self.name = name
self.batch_size = batch_size
self.learning_rate = learning_rate
self.max_iter = max_iter
self.optimizer = optimizer.lower()
self.lambda_ = lambda_
def fit(self, feature, label, verbose=False):
num, dim = feature.shape
self.W = np.random.normal(0, 1, (dim,))
for t in range(self.max_iter):
if self.optimizer == 'sgd':
rand_pos = np.random.choice(num, self.batch_size)
loss = self.cal_loss(feature[rand_pos, :], label[rand_pos])
grad = self.cal_grad(feature[rand_pos, :], label[rand_pos])
else:
loss = self.cal_loss(feature, label)
grad = self.cal_grad(feature, label)
if verbose:
print(self.name, '@epoch', t, ' loss: ', loss)
self.W = self.W - self.learning_rate * grad
def predict(self, feature, probability=False):
if probability:
return 1 / (1 np.exp(-np.dot(feature, self.W)))
else:
return (1 / (1 np.exp(-np.dot(feature, self.W))) > 0.5).astype(int)
def score(self, feature, label):
pred_label = (1 / (1 np.exp(-np.dot(feature, self.W))) > 0.5).astype(int)
return np.where(pred_label == label)[0].shape[0] / feature.shape[0]
def cal_loss(self, feature, label):
num, dim = feature.shape
return ((np.sum(np.log(1 np.exp(np.dot(feature, self.W)))) - np.dot(label, np.dot(feature, self.W)))
self.lambda_ * linalg.norm(self.W, ord=1)) / num
def cal_grad(self, feature, label):
num, dim = feature.shape
return (np.dot(feature.T, 1.0 / (1.0 np.exp(-np.dot(feature, self.W))) - label)
self.lambda_ * np.sign(self.W)) / num
Kernel逻辑回归(Kernel Logistic Regression)
Python
代码语言:txt复制# Name: kernel_logistic_regression_lasso
# Author: Reacubeth
# Time: 2021/5/9 23:43
# Mail: noverfitting@gmail.com
# Site: www.omegaxyz.com
# *_*coding:utf-8 *_*
import numpy as np
from numpy import linalg
class KernelLogisticRegression:
def __init__(self, name, kernel, batch_size, learning_rate, max_iter, optimizer, lambda_, kernel_para):
super().__init__()
self.W = None
self.name = name
self.kernel = kernel.lower()
self.batch_size = batch_size
self.learning_rate = learning_rate
self.max_iter = max_iter
self.optimizer = optimizer.lower()
self.lambda_ = lambda_
self.kernel_para = kernel_para
def fit(self, feature, label, verbose=False):
num, dim = feature.shape
self.W = np.random.normal(0, 1, (dim,))
for t in range(self.max_iter):
if self.optimizer == 'sgd':
rand_pos = np.random.choice(num, self.batch_size)
if self.kernel == 'rbf':
K_feature_rand = self.kernel_rbf(feature[rand_pos, :], feature, self.kernel_para)
elif self.kernel == 'poly':
K_feature_rand = self.kernel_poly(feature[rand_pos, :], feature, self.kernel_para)
elif self.kernel == 'cosine':
K_feature_rand = self.kernel_cosine(feature[rand_pos, :], feature)
else:
raise NotImplementedError
loss = self.cal_loss(K_feature_rand, label[rand_pos])
grad = self.cal_grad(K_feature_rand, label[rand_pos])
else:
if self.kernel == 'rbf':
K_feature = self.kernel_rbf(feature, feature, self.kernel_para)
elif self.kernel == 'poly':
K_feature = self.kernel_poly(feature, feature, self.kernel_para)
elif self.kernel == 'cosine':
K_feature = self.kernel_cosine(feature, feature)
else:
raise NotImplementedError
loss = self.cal_loss(K_feature, label)
grad = self.cal_grad(K_feature, label)
if verbose:
print(self.name, '@epoch', t, ' loss: ', loss)
self.W = self.W - self.learning_rate * grad
def predict(self, feature, probability=False):
if self.kernel == 'rbf':
raw_prob = self.kernel_rbf(feature, feature, self.kernel_para)
elif self.kernel == 'poly':
raw_prob = self.kernel_poly(feature, feature, self.kernel_para)
elif self.kernel == 'cosine':
raw_prob = self.kernel_cosine(feature, feature)
else:
raise NotImplementedError
if probability:
return 1 / (1 np.exp(-np.dot(raw_prob, self.W)))
else:
return (1 / (1 np.exp(-np.dot(raw_prob, self.W))) > 0.5).astype(int)
def score(self, feature, label):
if self.kernel == 'rbf':
raw_prob = self.kernel_rbf(feature, feature, self.kernel_para)
elif self.kernel == 'poly':
raw_prob = self.kernel_poly(feature, feature, self.kernel_para)
elif self.kernel == 'cosine':
raw_prob = self.kernel_cosine(feature, feature)
else:
raise NotImplementedError
pred_label = (1 / (1 np.exp(-np.dot(raw_prob, self.W))) > 0.5).astype(int)
return np.where(pred_label == label)[0].shape[0] / feature.shape[0]
def cal_loss(self, feature, label):
num, dim = feature.shape
logistic_loss = np.sum(np.log(1 np.exp(np.dot(feature, self.W)))) - np.dot(label, np.dot(feature, self.W))
return logistic_loss / num self.lambda_ * linalg.norm(self.W, ord=1)
def cal_grad(self, feature, label):
num, dim = feature.shape
logistic_grad = np.dot(feature.T, 1.0 / (1.0 np.exp(-np.dot(feature, self.W))) - label)
return logistic_grad / num self.lambda_ * np.sign(self.W)
@staticmethod
def kernel_rbf(X, Y, sigma):
norm_X = (linalg.norm(X, axis=1) ** 2)
norm_Y = (linalg.norm(Y, axis=1) ** 2)
return np.exp(- (norm_X[:, None] norm_Y[None, :] - 2 * np.dot(X, Y.T)) / (2 * sigma ** 2))
@staticmethod
def kernel_poly(X, Y, d):
return np.dot(X, Y.T) ** d
@staticmethod
def kernel_cosine(X, Y):
norm_X = (linalg.norm(X, axis=1) ** 2)
norm_Y = (linalg.norm(Y, axis=1) ** 2)
return np.dot(X, Y.T) / (norm_X[:, None] * norm_Y[None, :])
相关文章
- 图神经网络(GNN)TensorFlow实现
- BERT-BiLSTM-CRF命名实体识别应用
- FR算法(Fruchterman-Reingold)Python实现
- 图注意力网络(GAT) TensorFlow实现
- 基于LDA的文本主题聚类Python实现
- LLE(Locally Linear Embedding)算法
- AdaBoost简介及Python应用
- Astar算法解决八数码问题Python实现(GUI)
- 差分进化算法Python实现
- Python支持向量机(SVM)实例