逻辑回归模型及变体实现

2021-09-08 14:59:44 浏览数 (1)

logistic回归又称logistic回归分析,是一种广义的线性回归分析模型,常用于数据挖掘,疾病自动诊断,经济预测等领域。类logistic模型的相似性在于,所有这些模型中都存在logistic损失的变体,如等式1所示。

这些模型的差异主要是Loss不同。

Loss

Python代码

逻辑回归(Logistic Regression)

Python

代码语言:txt复制
# Name: logistic_regression
# Author: Reacubeth
# Time: 2021/5/9 14:36
# Mail: noverfitting@gmail.com
# Site: www.omegaxyz.com
# *_*coding:utf-8 *_*
 
import numpy as np
 
 
class LogisticRegression:
    def __init__(self, name, batch_size, learning_rate, max_iter, optimizer):
        super().__init__()
        self.W = None
        self.name = name
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.optimizer = optimizer.lower()
 
    def fit(self, feature, label, verbose=False):
        num, dim = feature.shape
        self.W = np.random.normal(0, 1, (dim, ))
 
        for t in range(self.max_iter):
            if self.optimizer == 'sgd':
                rand_pos = np.random.choice(num, self.batch_size)
                loss = self.cal_loss(feature[rand_pos, :], label[rand_pos])
                grad = self.cal_grad(feature[rand_pos, :], label[rand_pos])
            else:
                loss = self.cal_loss(feature, label)
                grad = self.cal_grad(feature, label)
            if verbose:
                print(self.name, '@epoch', t, ' loss: ', loss)
            self.W = self.W - self.learning_rate * grad
 
    def predict(self, feature, probability=False):
        if probability:
            return 1 / (1   np.exp(-np.dot(feature, self.W)))
        else:
            return (1 / (1   np.exp(-np.dot(feature, self.W))) > 0.5).astype(int)
 
    def score(self, feature, label):
        pred_label = (1 / (1   np.exp(-np.dot(feature, self.W))) > 0.5).astype(int)
        return np.where(pred_label == label)[0].shape[0] / feature.shape[0]
 
    def cal_loss(self, feature, label):
        num, dim = feature.shape
        return (np.sum(np.log(1   np.exp(np.dot(feature, self.W)))) - np.dot(label, np.dot(feature, self.W))) / num
 
    def cal_grad(self, feature, label):
        num, dim = feature.shape
        return np.dot(feature.T, 1.0 / (1.0   np.exp(-np.dot(feature, self.W))) - label) / num

岭逻辑回归(Ridge Logistic Regression)

Python

代码语言:txt复制
# Name: ridge_logistic_regression
# Author: Reacubeth
# Time: 2021/5/9 19:36
# Mail: noverfitting@gmail.com
# Site: www.omegaxyz.com
# *_*coding:utf-8 *_*
 
import numpy as np
from numpy import linalg
 
 
class RidgeLogisticRegression:
    def __init__(self, name, batch_size, learning_rate, max_iter, optimizer, lambda_):
        super().__init__()
        self.W = None
        self.name = name
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.optimizer = optimizer.lower()
        self.lambda_ = lambda_
 
    def fit(self, feature, label, verbose=False):
        num, dim = feature.shape
        self.W = np.random.normal(0, 1, (dim, ))
 
        for t in range(self.max_iter):
            if self.optimizer == 'sgd':
                rand_pos = np.random.choice(num, self.batch_size)
                loss = self.cal_loss(feature[rand_pos, :], label[rand_pos])
                grad = self.cal_grad(feature[rand_pos, :], label[rand_pos])
            else:
                loss = self.cal_loss(feature, label)
                grad = self.cal_grad(feature, label)
            if verbose:
                print(self.name, '@epoch', t, ' loss: ', loss)
            self.W = self.W - self.learning_rate * grad
 
    def predict(self, feature, probability=False):
        if probability:
            return 1 / (1   np.exp(-np.dot(feature, self.W)))
        else:
            return (1 / (1   np.exp(-np.dot(feature, self.W))) > 0.5).astype(int)
 
    def score(self, feature, label):
        pred_label = (1 / (1   np.exp(-np.dot(feature, self.W))) > 0.5).astype(int)
        return np.where(pred_label == label)[0].shape[0] / feature.shape[0]
 
    def cal_loss(self, feature, label):
        num, dim = feature.shape
        logistic_loss = np.sum(np.log(1   np.exp(np.dot(feature, self.W)))) - np.dot(label, np.dot(feature, self.W))
        # yxw = np.dot(label, np.dot(feature, self.W))
        # return (np.sum(np.log(1   np.exp(-yxw)))   self.lambda_ / 2 * linalg.norm(self.W) ** 2) / num
        return (logistic_loss   self.lambda_ / 2 * linalg.norm(self.W) ** 2) / num
 
    def cal_grad(self, feature, label):
        num, dim = feature.shape
        return (np.dot(feature.T, 1.0 / (1.0   np.exp(-np.dot(feature, self.W))) - label)   self.lambda_ * self.W) / num

Lasso逻辑回归(Lasso Logistic Regression)

Python

代码语言:txt复制
# Name: lasso_logistic_regression
# Author: Reacubeth
# Time: 2021/5/9 20:17
# Mail: noverfitting@gmail.com
# Site: www.omegaxyz.com
# *_*coding:utf-8 *_*
 
import numpy as np
from numpy import linalg
 
 
class LassoLogisticRegression:
    def __init__(self, name, batch_size, learning_rate, max_iter, optimizer, lambda_):
        super().__init__()
        self.W = None
        self.name = name
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.optimizer = optimizer.lower()
        self.lambda_ = lambda_
 
    def fit(self, feature, label, verbose=False):
        num, dim = feature.shape
        self.W = np.random.normal(0, 1, (dim,))
 
        for t in range(self.max_iter):
            if self.optimizer == 'sgd':
                rand_pos = np.random.choice(num, self.batch_size)
                loss = self.cal_loss(feature[rand_pos, :], label[rand_pos])
                grad = self.cal_grad(feature[rand_pos, :], label[rand_pos])
            else:
                loss = self.cal_loss(feature, label)
                grad = self.cal_grad(feature, label)
            if verbose:
                print(self.name, '@epoch', t, ' loss: ', loss)
            self.W = self.W - self.learning_rate * grad
 
    def predict(self, feature, probability=False):
        if probability:
            return 1 / (1   np.exp(-np.dot(feature, self.W)))
        else:
            return (1 / (1   np.exp(-np.dot(feature, self.W))) > 0.5).astype(int)
 
    def score(self, feature, label):
        pred_label = (1 / (1   np.exp(-np.dot(feature, self.W))) > 0.5).astype(int)
        return np.where(pred_label == label)[0].shape[0] / feature.shape[0]
 
    def cal_loss(self, feature, label):
        num, dim = feature.shape
        return ((np.sum(np.log(1   np.exp(np.dot(feature, self.W)))) - np.dot(label, np.dot(feature, self.W)))  
                self.lambda_ * linalg.norm(self.W, ord=1)) / num
 
    def cal_grad(self, feature, label):
        num, dim = feature.shape
        return (np.dot(feature.T, 1.0 / (1.0   np.exp(-np.dot(feature, self.W))) - label)  
                self.lambda_ * np.sign(self.W)) / num

Kernel逻辑回归(Kernel Logistic Regression)

Python

代码语言:txt复制
# Name: kernel_logistic_regression_lasso
# Author: Reacubeth
# Time: 2021/5/9 23:43
# Mail: noverfitting@gmail.com
# Site: www.omegaxyz.com
# *_*coding:utf-8 *_*
 
import numpy as np
from numpy import linalg
 
 
class KernelLogisticRegression:
    def __init__(self, name, kernel, batch_size, learning_rate, max_iter, optimizer, lambda_, kernel_para):
        super().__init__()
        self.W = None
        self.name = name
        self.kernel = kernel.lower()
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.optimizer = optimizer.lower()
        self.lambda_ = lambda_
        self.kernel_para = kernel_para
 
    def fit(self, feature, label, verbose=False):
        num, dim = feature.shape
        self.W = np.random.normal(0, 1, (dim,))
 
        for t in range(self.max_iter):
            if self.optimizer == 'sgd':
                rand_pos = np.random.choice(num, self.batch_size)
                if self.kernel == 'rbf':
                    K_feature_rand = self.kernel_rbf(feature[rand_pos, :], feature, self.kernel_para)
                elif self.kernel == 'poly':
                    K_feature_rand = self.kernel_poly(feature[rand_pos, :], feature, self.kernel_para)
                elif self.kernel == 'cosine':
                    K_feature_rand = self.kernel_cosine(feature[rand_pos, :], feature)
                else:
                    raise NotImplementedError
                loss = self.cal_loss(K_feature_rand, label[rand_pos])
                grad = self.cal_grad(K_feature_rand, label[rand_pos])
            else:
                if self.kernel == 'rbf':
                    K_feature = self.kernel_rbf(feature, feature, self.kernel_para)
                elif self.kernel == 'poly':
                    K_feature = self.kernel_poly(feature, feature, self.kernel_para)
                elif self.kernel == 'cosine':
                    K_feature = self.kernel_cosine(feature, feature)
                else:
                    raise NotImplementedError
                loss = self.cal_loss(K_feature, label)
                grad = self.cal_grad(K_feature, label)
            if verbose:
                print(self.name, '@epoch', t, ' loss: ', loss)
            self.W = self.W - self.learning_rate * grad
 
    def predict(self, feature, probability=False):
        if self.kernel == 'rbf':
            raw_prob = self.kernel_rbf(feature, feature, self.kernel_para)
        elif self.kernel == 'poly':
            raw_prob = self.kernel_poly(feature, feature, self.kernel_para)
        elif self.kernel == 'cosine':
            raw_prob = self.kernel_cosine(feature, feature)
        else:
            raise NotImplementedError
 
        if probability:
            return 1 / (1   np.exp(-np.dot(raw_prob, self.W)))
        else:
            return (1 / (1   np.exp(-np.dot(raw_prob, self.W))) > 0.5).astype(int)
 
    def score(self, feature, label):
        if self.kernel == 'rbf':
            raw_prob = self.kernel_rbf(feature, feature, self.kernel_para)
        elif self.kernel == 'poly':
            raw_prob = self.kernel_poly(feature, feature, self.kernel_para)
        elif self.kernel == 'cosine':
            raw_prob = self.kernel_cosine(feature, feature)
        else:
            raise NotImplementedError
 
        pred_label = (1 / (1   np.exp(-np.dot(raw_prob, self.W))) > 0.5).astype(int)
        return np.where(pred_label == label)[0].shape[0] / feature.shape[0]
 
    def cal_loss(self, feature, label):
        num, dim = feature.shape
        logistic_loss = np.sum(np.log(1   np.exp(np.dot(feature, self.W)))) - np.dot(label, np.dot(feature, self.W))
        return logistic_loss / num   self.lambda_ * linalg.norm(self.W, ord=1)
 
    def cal_grad(self, feature, label):
        num, dim = feature.shape
        logistic_grad = np.dot(feature.T, 1.0 / (1.0   np.exp(-np.dot(feature, self.W))) - label)
        return logistic_grad / num   self.lambda_ * np.sign(self.W)
 
    @staticmethod
    def kernel_rbf(X, Y, sigma):
        norm_X = (linalg.norm(X, axis=1) ** 2)
        norm_Y = (linalg.norm(Y, axis=1) ** 2)
        return np.exp(- (norm_X[:, None]   norm_Y[None, :] - 2 * np.dot(X, Y.T)) / (2 * sigma ** 2))
 
    @staticmethod
    def kernel_poly(X, Y, d):
        return np.dot(X, Y.T) ** d
 
    @staticmethod
    def kernel_cosine(X, Y):
        norm_X = (linalg.norm(X, axis=1) ** 2)
        norm_Y = (linalg.norm(Y, axis=1) ** 2)
        return np.dot(X, Y.T) / (norm_X[:, None] * norm_Y[None, :])

相关文章

  • 图神经网络(GNN)TensorFlow实现
  • BERT-BiLSTM-CRF命名实体识别应用
  • FR算法(Fruchterman-Reingold)Python实现
  • 图注意力网络(GAT) TensorFlow实现
  • 基于LDA的文本主题聚类Python实现
  • LLE(Locally Linear Embedding)算法
  • AdaBoost简介及Python应用
  • Astar算法解决八数码问题Python实现(GUI)
  • 差分进化算法Python实现
  • Python支持向量机(SVM)实例

0 人点赞