机器学习 (五): 模型评估

约定不等于承诺〃 2022-02-22 19:22 436阅读 0赞

机器学习 模型评估

  • 交叉验证
  • Baseline 模型
    • 数值型baseline
    • 分类型baseline
  • ROC 曲线
  • Confusion Matrix

交叉验证

  1. from sklearn.preprocessing import StandardScaler
  2. from sklearn.pipeline import make_pipeline
  3. from sklearn.linear_model import LogisticRegression
  4. from sklearn.model_selection import KFold, cross_val_score
  5. from sklearn.datasets import load_digits
  6. # 加载数据 (手写数字图像)
  7. digits = load_digits()
  8. features = digits.data
  9. target = digits.target
  10. # 创建一个流水线, 流水线由
  11. # 将输入特征变换为0均值,1方差的缩放器
  12. # 逻辑回归模型
  13. # 组成
  14. pipeline = make_pipeline(StandardScaler(), LogisticRegression())
  15. # 交叉验证, Fold=10
  16. cv_res = cross_val_score(pipeline, features, target, cv=KFold(10, shuffle=True, random_state=1), scoring='accuracy', n_jobs=-1)
  17. print(cv_res.mean())

Baseline 模型

数值型baseline

  1. from sklearn.datasets import load_boston
  2. from sklearn.linear_model import LinearRegression
  3. from sklearn.preprocessing import train_test_split
  4. from sklearn.dummy import DummyRegressor
  5. from sklearn.model_selection import train_test_split
  6. # 加载数据
  7. boston = load_boston()
  8. features, target = boston.data, boston.target
  9. x_train, x_test, y_train, y_test = train_test_split(features, target, test_size = 0.2)
  10. # 预处理, 缩放数据
  11. std_scaler = StandardScaler()
  12. std_scaler.fit(x_train)
  13. x_train = std_scaler.transform(x_train)
  14. x_test = std_scaler.transform(x_test)
  15. # baselien model
  16. baseline = DummyRegressor(strategy='mean')
  17. baseline.fit(x_train, y_train)
  18. baseline.score(x_test, y_test)
  19. # 得分 -0.05
  20. # my model
  21. clf = LinearRegression()
  22. clf.fit(x_train, y_train)
  23. clf.score(x_test, y_test)
  24. # 得分 0.74

分类型baseline

  1. from sklearn.datasets import load_iris
  2. from sklearn.preprocessing import StandardScaler
  3. from sklearn.model_selection import train_test_split
  4. from sklearn.dummy import DummyClassifier
  5. from sklearn.linear_model import LogisticRegression
  6. # 加载数据
  7. iris = load_iris()
  8. features, target = iris.data, iris.target
  9. x_train, x_test, y_train, y_test = train_test_split(features, target, test_size = 0.2)
  10. # 预处理
  11. std_scaler = StandardScaler()
  12. std_scaler.fit(x_train)
  13. x_train = std_scaler.transform(x_train)
  14. x_test = std_scaler.transform(x_test)
  15. # baseline
  16. baseline = DummyClassifier(strategy='stratified', random_state=1)
  17. baseline.fit(x_train, y_train)
  18. print(baseline.score(x_test, y_test))
  19. # 得分 0.4
  20. # my model
  21. clf = LogisticRegression(solver='lbfgs', multi_class='auto')
  22. clf.fit(x_train, y_train)
  23. clf.score(x_test, y_test)
  24. # 得分 0.97

ROC 曲线

  1. from sklearn.datasets import make_classification
  2. from sklearn.linear_model import LogisticRegression
  3. from sklearn.metrics import roc_curve, roc_auc_score
  4. from sklearn.model_selection import train_test_split
  5. from sklearn.preprocessing import StandardScaler
  6. from sklearn.dummy import DummyClassifier
  7. import matplotlib.pyplot as plt
  8. %matplotlib inline
  9. features, target = make_classification(n_samples = 1000, n_features = 10, n_informative = 2, n_classes = 2)
  10. x_train, x_test, y_train, y_test = train_test_split(features, target, test_size = 0.2)
  11. std_scaler = StandardScaler()
  12. std_scaler.fit(x_train)
  13. x_train = std_scaler.transform(x_train)
  14. x_test = std_scaler.transform(x_test)
  15. baseline = DummyClassifier(strategy='stratified')
  16. baseline.fit(x_train, y_train)
  17. y_pred = baseline.predict_proba(x_test)[:,1]
  18. fp, tp, threshold = roc_curve(y_test, y_pred)
  19. plt.plot(fp, tp, label='baseline (%f)' % roc_auc_score(y_test, y_pred))
  20. clf = LogisticRegression(solver='lbfgs')
  21. clf.fit(x_train, y_train)
  22. y_pred = clf.predict_proba(x_test)[:,1]
  23. fp, tp, threshold = roc_curve(y_test, y_pred)
  24. plt.plot(fp, tp, label='mine (%f)' % roc_auc_score(y_test, y_pred))
  25. plt.legend()

0069McTXly1g1wk5qyzs5j30af070t8r.jpg

Confusion Matrix

  1. from sklearn.datasets import load_iris
  2. from sklearn.preprocessing import StandardScaler
  3. from sklearn.model_selection import train_test_split
  4. from sklearn.metrics import confusion_matrix
  5. from sklearn.linear_model import LogisticRegression
  6. import seaborn as sns
  7. # 加载数据
  8. iris = load_iris()
  9. features, target = iris.data, iris.target
  10. x_train, x_test, y_train, y_test = train_test_split(features, target, test_size = 0.5)
  11. # 预处理
  12. std_scaler = StandardScaler()
  13. std_scaler.fit(x_train)
  14. x_train = std_scaler.transform(x_train)
  15. x_test = std_scaler.transform(x_test)
  16. # my model
  17. clf = LogisticRegression(solver='lbfgs', multi_class='auto')
  18. clf.fit(x_train, y_train)
  19. y_pred = clf.predict(x_test)
  20. sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, cmap='Blues')

0069McTXly1g1wki3awx9j309m070745.jpg

发表评论

表情:
评论列表 (有 0 条评论,436人围观)

还没有评论,来说两句吧...

相关阅读