以下是使用scikit learn预测、做出决策边界并画出ROC曲线的一个示例,以鸢尾花数据集为例。
1. 导入鸢尾花的数据
代码语言:javascript复制import numpy as np
import matplotlib.pyplot as plt
import warnings
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
warnings.filterwarnings('ignore')
iris = datasets.load_iris()
X = iris.data
y = iris.target
X = X[y<2,:2]
y = y[y<2] # 方便可视化
2. 标准化数据并使用SVM预测
代码语言:javascript复制standardScaler = StandardScaler()
standardScaler.fit(X)
X_standard = standardScaler.transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_standard, y, test_size=0.75, random_state=1)
svc2 = LinearSVC(C=0.001)
svc2.fit(X_train, y_train)
3. 做出决策边界
代码语言:javascript复制# 决策边界函数
def plot_boundary(model, X, y):
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() .5
h = .02 # step size in the mesh
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure(1, figsize=(4, 3))
plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Set3)
# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k', cmap=plt.cm.Greens)
plt.show()
plot_boundary(svc2, X_train, y_train)
4. ROC曲线
代码语言:javascript复制y_pred_proba = poly_kernel_svc.predict_proba(X_test)[::,1]
fpr, tpr, _ = metrics.roc_curve(y_test, y_pred_proba)
auc = metrics.roc_auc_score(y_test, y_pred_proba)
plt.plot(fpr,tpr,label='SVM model AUC %0.2f' % auc, color='blue', lw = 2)
plt.plot([0, 1], [0, 1], color='black', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating Curve')
plt.legend(loc="lower right")
plt.show()
示例数据集比较简单,所以效果非常好,一般的数据集画出的效果如下: