【模式识别】SVM实现人脸表情分类

2022-06-14 14:19:28 浏览数 (1)

前言

本文是模式识别课程关于支持向量机(SVM)算法的课程设计,根据人脸的面部特征,通过SVM算法将表情分为7类。 本文的jupyter文件和数据集下载地址: https://download.csdn.net/download/qq1198768105/66912662

数据集

本文采用的数据集为The Japanese Female Facial Expression (JAFFE) Dataset 数据集来源:https://zenodo.org/record/3451524#.YaeJztBByUl 共有七个类别:anger、disgust、fear、happiness、neutral、sadness、surprise

导库

代码语言:javascript复制
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
import seaborn

from sklearn.svm import SVC
from skimage.feature import hog
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

数据预处理

读入图像,转换成灰度,大小转换成256*256,数据归一化

代码语言:javascript复制
def preprocessing(src):
    gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)  # 将图像转换成灰度图
    img = cv2.resize(gray, (256, 256))  # 尺寸调整g
    img = img/255.0    # 数据归一化
    return img

特征提取

采用Hog批量提取图片特征

采用方向梯度直方图(Histograms of Oriented Gradient,HOG)来提取特征,法国研究人员Dalal在2005的CVPR提出HOG SVM的方法,优点是图像几何的和光学的形变都能保持很好的不变性。

关于HOG的原理和步骤流程可参考下面两篇文章。 https://blog.csdn.net/zouxy09/article/details/7929348 https://blog.csdn.net/qq_34106574/article/details/88317902

代码语言:javascript复制
def extract_hog_features(X):
    image_descriptors = []
    for i in range(len(X)):
        '''
        参数解释:
        orientations:方向数
        pixels_per_cell:胞元大小
        cells_per_block:块大小
        block_norm:可选块归一化方法L2-Hys(L2范数)
        visualize:可视化
        '''
        fd, _ = hog(X[i], orientations=9, pixels_per_cell=(16, 16), cells_per_block=(16, 16), block_norm='L2-Hys', visualize=True)
        image_descriptors.append(fd)  # 拼接得到所有图像的hog特征
    return image_descriptors        # 返回的是训练部分所有图像的hog特征

提取单张图片特征

代码语言:javascript复制
def extract_hog_features_single(X):
    image_descriptors_single = []
    fd, _ = hog(X, orientations=9, pixels_per_cell=(16, 16), cells_per_block=(16, 16),block_norm='L2-Hys', visualize=True)
    image_descriptors_single.append(fd)
    return image_descriptors_single

读取数据

代码语言:javascript复制
def read_data(label2id):       # label2id为定义的标签
    X = []
    Y = []
    path ='./jaffe'
    for label in os.listdir(path):                 # os.listdir用于返回指定的文件夹包含的文件或文件夹的名字的列表,此处遍历每个文件夹
        for img_file in os.listdir(os.path.join(path, label)):             # 遍历每个表情文件夹下的图像
            image = cv2.imread(os.path.join(path, label, img_file))        # 读取图像
            if image is not None:
                result = preprocessing(image)
                X.append(result)                                    # 将读取到的所有图像的矩阵形式拼接在一起
                Y.append(label2id[label])                          # 将读取到的所有图像的标签拼接在一起
    return X, Y                                               # 返回的X,Y分别是图像的矩阵表达和图像的标签

划分数据

训练集/测试集=7/3

代码语言:javascript复制
label2id = {'anger':0, 'disgust':1, 'fear': 2,'happiness':3,'neutral':4,'sadness':5,'surprise':6}
X, Y = read_data(label2id)
X_features = extract_hog_features(X)
X_train, X_test, Y_train, Y_test = train_test_split(X_features, Y, test_size=0.3, random_state=42)

网格搜索选取SVM参数

对poly核进行网格搜索

代码语言:javascript复制
from sklearn.model_selection import KFold, GridSearchCV

gamma_range = np.logspace(-10,1,10)
coef0_range = np.linspace(0,5,10)
C_range = np.linspace(0.01,30,10)
degree_range = np.linspace(0,10,11)
param_grid = dict(gamma = gamma_range
                 ,coef0 = coef0_range
                 ,C = C_range
                 ,degree = degree_range
                 )
cv = KFold(n_splits=5,shuffle=True,random_state=520)
grid = GridSearchCV(SVC(kernel = "poly"),param_grid=param_grid, cv=cv, n_jobs = -1) # n_jobs = -1 调用所有线程
grid.fit(X_train, Y_train)
print("The best parameters are %s with a score of %0.5f" % (grid.best_params_, grid.best_score_))

对sigmoid核进行网格搜索

代码语言:javascript复制
from sklearn.model_selection import KFold, GridSearchCV

gamma_range = np.logspace(-10,1,10)
coef0_range = np.linspace(0,5,10)
C_range = np.linspace(0.01,30,20)
param_grid = dict(gamma = gamma_range
                 ,coef0 = coef0_range
                 ,C = C_range
                 )
cv = KFold(n_splits=5,shuffle=True,random_state=520)
grid = GridSearchCV(SVC(kernel = "sigmoid"),param_grid=param_grid, cv=cv, n_jobs = -1) # n_jobs = -1 调用所有线程
grid.fit(X_train, Y_train)
print("The best parameters are %s with a score of %0.5f" % (grid.best_params_, grid.best_score_))

对rbf核进行网格搜索

代码语言:javascript复制
from sklearn.model_selection import KFold, GridSearchCV

gamma_range = np.logspace(-10,1,10)
C_range = np.linspace(0.01,30,20)
param_grid = dict(gamma = gamma_range
                 ,C = C_range
                 )
cv = KFold(n_splits=5,shuffle=True,random_state=520)
grid = GridSearchCV(SVC(kernel = "rbf"),param_grid=param_grid, cv=cv, n_jobs = -1) # n_jobs = -1 调用所有线程
grid.fit(X_train, Y_train)
print("The best parameters are %s with a score of %0.5f" % (grid.best_params_, grid.best_score_))

对Linear核进行C取值搜索

代码语言:javascript复制
C_range = np.linspace(0.01, 30, 30)
best_acc = -1
best_c = -1
for c in C_range:
    clf = SVC(kernel="linear", C=c)
    clf.fit(X_train, Y_train)
    acc = clf.score(X_test, Y_test)
    if acc > best_acc:
        best_acc = acc
        best_c = c
print("The best c is %0.5f with a score of %0.5f" % (best_c, best_acc))

结果分析:

在上面分别对多项式核(poly),双曲正切核(sigmoid),高斯径向基(rbf),线性核(linear)进行了网格搜索

poly的最佳准确率为75.7%,sigmoid的最佳准确率为70.4%,rbf的最佳准确率为72.4%,linear的最佳准确率为93.8%

因此选择线性核对该数据效果最佳,其中最佳的参数C选择为15.52。

探究各参数的影响

用rbf核探究gamma的影响

代码语言:javascript复制
score = []
gamma_range = np.logspace(-10, 1, 50) #返回在对数刻度上均匀间隔的数字
for i in gamma_range:
    clf = SVC(kernel="rbf", gamma = i, cache_size=5000).fit(X_train,Y_train)
    score.append(clf.score(X_test, Y_test))
print(max(score), gamma_range[score.index(max(score))])
plt.plot(gamma_range, score)
plt.xlabel('gamma')
plt.ylabel('Accuracy')
plt.savefig('pt1.jpg')
plt.show()

用linear核探究C的影响

代码语言:javascript复制
score = []
C_range = np.linspace(0.01, 30, 30)
for i in C_range:
    clf = SVC(kernel="rbf", C = i, cache_size=5000).fit(X_train,Y_train)
    score.append(clf.score(X_test, Y_test))
print(max(score), C_range[score.index(max(score))])
plt.xlabel('C')
plt.ylabel('Accuracy')
plt.savefig('pt2.jpg')
plt.plot(C_range, score)
plt.show()

参数c影响了支持向量与决策平面之间的距离,c越大,分类越严格,不能有错误;c越小,意味着有更大的错误容忍度。

参数gamma是对低维的样本进行高度度映射,gamma值越大映射的维度越高,训练的结果越好,但是越容易引起过拟合,即泛化能力低。

和其它方式进行对比

线性核SVM(前面筛选出最好的C=15.52)

代码语言:javascript复制
svm = SVC(C = 15.52, kernel='linear')
svm.fit(X_train, Y_train)
Y_predict = svm.predict(X_test)
acc = accuracy_score(Y_test, Y_predict)
print('SVM准确率为: ', acc)

KNN准确率

代码语言:javascript复制
knn = KNeighborsClassifier(n_neighbors=1) # k取1,最近邻准确率较高
knn.fit(X_train,Y_train)
Y_predict = knn.predict(X_test)
acc = accuracy_score(Y_test, Y_predict)
print('KNN准确率为: ', acc)

决策树准确率

代码语言:javascript复制
tree_D = DecisionTreeClassifier()
tree_D.fit(X_train, Y_train)
Y_predict = tree_D.predict(X_test)
acc = accuracy_score(Y_test, Y_predict)
print('决策树准确率为: ', acc)

逻辑回归准确率

代码语言:javascript复制
logistic = LogisticRegression()
logistic.fit(X_train, Y_train)
Y_predict = logistic.predict(X_test)
acc = accuracy_score(Y_test, Y_predict)
print('逻辑回归准确率为: ', acc)

朴素贝叶斯准确率

代码语言:javascript复制
mlt = GaussianNB()
mlt.fit(X_train, Y_train)
Y_predict = mlt.predict(X_test)
acc = accuracy_score(Y_test, Y_predict)
print('朴素贝叶斯准确率为: ', acc)

随机森林准确率

代码语言:javascript复制
Forest = RandomForestClassifier(n_estimators=180,random_state=0)
Forest.fit(X_train, Y_train)
Y_predict = Forest.predict(X_test)
acc = accuracy_score(Y_test, Y_predict)
print('随机森林准确率为: ', acc)

SVM Bagging准确率

代码语言:javascript复制
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import accuracy_score
svc = SVC(C = 20, kernel='poly')
clf = BaggingClassifier(base_estimator=svc, n_estimators=20, max_samples=1.0, max_features=1.0, 
                        bootstrap=True,bootstrap_features=False, n_jobs=-1, random_state=1)
clf.fit(X_train, Y_train)
Y_predict = clf.predict(X_test)
acc = accuracy_score(Y_test, Y_predict)
print('SVM Bagging准确率为: ', acc)

XGBoost准确率

代码语言:javascript复制
from xgboost import XGBClassifier as XGBR
reg = XGBR(n_estimators=200
          ,learning_rate=0.1
          ,booster="gblinear"
          ).fit(X_train,Y_train,eval_metric=['logloss','auc','error'])
Y_predict = reg.predict(X_test)
acc = accuracy_score(Y_test, Y_predict)
print('XGBoost准确率为: ', acc)

各方法结果:

分类器

最佳准确率

SVM

93.75%

KNN

85.94%

决策树

40.63%

逻辑回归

45.31%

朴素贝叶斯

60.94%

随机森林

65.63%

SVM Bagging

93.75%

XGBoost

93.75%

绘制SVM分类结果的混淆矩阵

代码语言:javascript复制
cm = confusion_matrix(Y_test, Y_predict)
xtick = ['anger', 'disgust', 'fear', 'happiness', 'neutral', 'sadness', 'surprise']
ytick = xtick

f, ax = plt.subplots(figsize=(7, 5))
ax.tick_params(axis='y', labelsize=15)
ax.tick_params(axis='x', labelsize=15)

seaborn.set(font_scale=1.2)
plt.rc('font', family='Times New Roman',size=15)
seaborn.heatmap(cm,fmt='g', cmap='Blues', annot=True, cbar=True,xticklabels=xtick, yticklabels=ytick, ax=ax)
plt.title('Confusion Matrix', fontsize='x-large')
f.savefig('./混淆矩阵.png')
plt.show()

尝试导入单张图片查看分类效果

这里选用准确率最高的SVM做分类器

代码语言:javascript复制
svm = SVC(C = 15.52, kernel='linear')
svm.fit(X_train, Y_train)
from IPython.display import Image

path = './test_pic.jpg'
image = cv2.imread(path)
display(Image(path))
result = preprocessing(image)
X_Single = extract_hog_features_single(result)
#这里选择分类器的类别
predict = svm.predict(X_Single)
if predict == 0:
    print('angry')
elif predict == 1:
    print('disgust')
elif predict == 2:
    print('fear')
elif predict == 3:
    print('happy')
elif predict == 4:
    print('neutral')
elif predict == 5:
    print('sad')
elif predict == 6:
    print('surprise')

0 人点赞