TensorFlow2.0实现逻辑回归二分类,多分类

2021-01-14 16:20:49 浏览数 (1)

代码路径:https://github.com/lilihongjava/leeblog_python/tree/master/tensorflow_logistic_regression

数据集iris.csv,sklearn 鸢尾花数据集

二分类

这里用二元的交叉熵作为二分类的损失函数,激活函数为sigmoid

代码语言:javascript复制
 layer0 = tf.keras.layers.Dense(1, input_shape=(x_data.shape[1],), activation='sigmoid')
 model = tf.keras.Sequential([layer0])
 model.compile(loss='binary_crossentropy', optimizer='adam')  

多分类

categorical_crossentropy:多类的对数损失,它是一个多分类损失函数,可以配合着softmax一起使用。

代码语言:javascript复制
 layer0 = tf.keras.layers.Dense(class_num, input_shape=(x_data.shape[1],), activation='softmax')
 model = tf.keras.Sequential([layer0])
 model.compile(loss='categorical_crossentropy', optimizer='adam')

整体代码

代码语言:javascript复制
# encoding: utf-8
"""
@author: lee
@time: 2020/6/24 16:35
@file: main.py
@desc: 
"""
import pandas as pd
import tensorflow as tf
from tensorflow.python.keras.utils.np_utils import to_categorical

from util.common_util import arg_check_transformation, multiple_gpu_strategy


def model_builder(x_data, class_num):
    if class_num == 2:  # 逻辑回归二分类
        layer0 = tf.keras.layers.Dense(1, input_shape=(x_data.shape[1],), activation='sigmoid')
        model = tf.keras.Sequential([layer0])
        model.compile(loss='binary_crossentropy', optimizer='adam')  # 这里用二元的交叉熵作为二分类的损失函数
    else:  # 多分类
        layer0 = tf.keras.layers.Dense(class_num, input_shape=(x_data.shape[1],), activation='softmax')
        model = tf.keras.Sequential([layer0])
        model.compile(loss='categorical_crossentropy', optimizer='adam')
    return model


def tf_logistic_regression(feature_column=None, label_column=None, class_num=None, gpu=None, input1=None,
                           output1=None):
    print("输入参数:", locals())
    feature_column = arg_check_transformation("list_name_str", "feature_column", feature_column)
    label_column = arg_check_transformation("list_name_str", "label_column", label_column)
    class_num = arg_check_transformation("int", "class_num", class_num)
    df = pd.read_csv(input1)
    try:
        x_data = df[feature_column]
        y_data = df[label_column]
        if class_num != 2 and y_data.shape[1] == 1:
            y_data = to_categorical(y_data)  # 一维的分类转成多列
            y_data = pd.DataFrame(y_data)
        if gpu:
            dataset, BATCH_SIZE, strategy = multiple_gpu_strategy(x_data, y_data)
            with strategy.scope():
                model = model_builder(x_data, class_num)
            model.fit(dataset.batch(BATCH_SIZE), verbose=False)
        else:
            model = model_builder(x_data, class_num)
            model.fit(x_data, y_data, epochs=1000)
    except Exception:
        raise Exception("模型训练错误")
    print("模型训练完成")
    if output1:
        model.save(output1)


if __name__ == '__main__':
    # 二分类
    tf_logistic_regression(feature_column="sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)",
                           label_column="target", class_num=2,
                           gpu=False, input1="./data/iris_two.csv", output1="./data/output.h5")
    # 多分类
    tf_logistic_regression(feature_column="sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)",
                           label_column="target", class_num=3,
                           gpu=False, input1="./data/iris.csv", output1="./data/output.h5")

0 人点赞